Merge authors: James Hunt (jamesodhunt) Related merge proposals: https://code.launchpad.net/~jamesodhunt/upstart/bug-1227212/+merge/187844 proposed by: James Hunt (jamesodhunt) review: Approve - Dmitrijs Ledkovs (xnox) ------------------------------------------------------------ revno: 1534 [merge] committer: Dmitrijs Ledkovs <[email protected]> branch nick: upstart timestamp: Fri 2013-09-27 15:27:39 +0100 message: Merge lp:~jamesodhunt/upstart/bug-1227212 modified: ChangeLog init/event.c init/job_process.c init/quiesce.c init/quiesce.h test/test_util_common.c test/test_util_common.h util/tests/test_initctl.c
-- lp:upstart https://code.launchpad.net/~upstart-devel/upstart/trunk Your team Upstart Reviewers is subscribed to branch lp:upstart. To unsubscribe from this branch go to https://code.launchpad.net/~upstart-devel/upstart/trunk/+edit-subscription
=== modified file 'ChangeLog' --- ChangeLog 2013-09-13 04:44:55 +0000 +++ ChangeLog 2013-09-26 16:33:07 +0000 @@ -1,3 +1,38 @@ +2013-09-26 James Hunt <[email protected]> + + * init/event.c: event_pending_handle_jobs(): Force quiesce when all job + instances have finished to speed session shutdown. + * init/job_process.c: job_process_jobs_running(): Only consider job + instances with associated pids to avoid abstract jobs confusing the + shutdown. + * init/quiesce.c: + - quiesce(): Optimise session shutdown + - Skip wait phase if no jobs care about the 'session-end' event + (LP: #1227212). + - Stop already running instances if other jobs care about + 'session-end' to allow the already-running jobs to shut down in + parallel with the newly-started session-end jobs. + - quiesce_wait_callback(): + - Simplify logic. + - Improve wait phase checks to detect earliest time to finalise. + - quiesce_finalise(): Display time to shutdown. + - quiesce_complete(): New function to force final shutdown phase. + - quiesce_event_match(): New function to determine if any jobs + 'start on' contains a particular event. + - quiesce_in_progress(): Determine if shutdown is being handled. + * test/test_util_common.c: + - _start_upstart(): Call get_upstart_binary() rather than relying on + UPSTART_BINARY define. + - start_upstart_common(): Remove '--no-startup-event' as this is now + needed by a test. + - get_upstart_binary(): Assert that file exists. + - file_exists(): New helper function. + * test/test_util_common.h: Typo and prototype. + * util/tests/test_initctl.c: test_quiesce(): + - New test "session shutdown: one long-running job which starts on + startup". + - Adjusted expected shutdown times. + 2013-09-12 Steve Langasek <[email protected]> * configure.ac: === modified file 'init/event.c' --- init/event.c 2013-06-04 16:51:55 +0000 +++ init/event.c 2013-09-26 16:33:07 +0000 @@ -44,6 +44,7 @@ #include "blocked.h" #include "control.h" #include "errors.h" +#include "quiesce.h" #include "com.ubuntu.Upstart.h" @@ -299,6 +300,8 @@ static void event_pending_handle_jobs (Event *event) { + int empty = TRUE; + nih_assert (event != NULL); job_class_init (); @@ -432,6 +435,23 @@ event_operator_reset (class->start_on); } } + + /* Determine if any job instances remain */ + NIH_HASH_FOREACH_SAFE (job_classes, iter) { + JobClass *class = (JobClass *)iter; + + NIH_HASH_FOREACH_SAFE (class->instances, job_iter) { + empty = FALSE; + break; + } + + if (! empty) + break; + } + + /* If no instances remain, force quiesce to finish */ + if (empty && quiesce_in_progress ()) + quiesce_complete (); } === modified file 'init/job_process.c' --- init/job_process.c 2013-07-01 21:05:15 +0000 +++ init/job_process.c 2013-09-26 16:33:07 +0000 @@ -1259,7 +1259,9 @@ /** * job_process_jobs_running: * - * Determine if any jobs are running. + * Determine if any jobs are running. Note that simply checking if class + * instances exist is insufficient: since this call is used for shutdown + * abstract jobs must not be able to block the shutdown. * * Returns: TRUE if jobs are still running, else FALSE. **/ @@ -1271,8 +1273,17 @@ NIH_HASH_FOREACH (job_classes, iter) { JobClass *class = (JobClass *)iter; - NIH_HASH_FOREACH (class->instances, job_iter) - return TRUE; + NIH_HASH_FOREACH (class->instances, job_iter) { + Job *job = (Job *)job_iter; + nih_local char *cmd = NULL; + int i; + nih_local char *pids = NULL; + + for (i = 0; i < PROCESS_LAST; i++) { + if (job->pid[i]) + return TRUE; + } + } } return FALSE; === modified file 'init/quiesce.c' --- init/quiesce.c 2013-07-31 09:28:48 +0000 +++ init/quiesce.c 2013-09-26 16:33:07 +0000 @@ -48,9 +48,16 @@ static QuiescePhase quiesce_phase = QUIESCE_PHASE_NOT_QUIESCED; /** + * quiesce_reason: + * + * Human-readable string denoting what triggered the quiesce. + **/ +static char *quiesce_reason = NULL; + +/** * max_kill_timeout: * - * Maxiumum kill_timout value calculated from all running jobs used to + * Maxiumum kill_timeout value calculated from all running jobs used to * determine how long to wait before exiting. **/ static time_t max_kill_timeout = 0; @@ -62,6 +69,24 @@ **/ static time_t quiesce_phase_time = 0; +/** + * quiesce_start_time: + * + * Time quiesce commenced. + **/ +static time_t quiesce_start_time = 0; + +/** + * session_end_jobs: + * + * TRUE if any job specifies a 'start on' including SESSION_END_EVENT. + * + **/ +static int session_end_jobs = FALSE; + +static int quiesce_event_match (Event *event) + __attribute__ ((warn_unused_result)); + /* External definitions */ extern int disable_respawn; @@ -76,7 +101,7 @@ quiesce (QuiesceRequester requester) { nih_local char **env = NULL; - const char *reason; + Event *event; job_class_init (); @@ -98,12 +123,12 @@ ? QUIESCE_PHASE_KILL : QUIESCE_PHASE_WAIT; - reason = (requester == QUIESCE_REQUESTER_SESSION) + quiesce_reason = (requester == QUIESCE_REQUESTER_SESSION) ? _("logout") : _("shutdown"); - nih_info (_("Quiescing due to %s request"), reason); + nih_info (_("Quiescing due to %s request"), quiesce_reason); - quiesce_phase_time = time (NULL); + quiesce_start_time = quiesce_phase_time = time (NULL); /* Stop existing jobs from respawning */ disable_respawn = TRUE; @@ -116,11 +141,43 @@ env = NIH_MUST (nih_str_array_new (NULL)); NIH_MUST (environ_set (&env, NULL, NULL, TRUE, - "TYPE=%s", reason)); - - NIH_MUST (event_new (NULL, SESSION_END_EVENT, env)); - - if (requester == QUIESCE_REQUESTER_SYSTEM) { + "TYPE=%s", quiesce_reason)); + + event = NIH_MUST (event_new (NULL, SESSION_END_EVENT, env)); + + /* Check if any jobs care about the session end event. If not, + * the wait phase can be avoided entirely resulting in a much + * faster shutdown. + * + * Note that simply checking if running instances exist is not + * sufficient since if a job cares about the session end event, + * it won't yet have started but needs to be given a chance to + * run. + */ + if (quiesce_phase == QUIESCE_PHASE_WAIT) { + + session_end_jobs = quiesce_event_match (event); + + if (session_end_jobs) { + /* Some as-yet unscheduled jobs care about the + * session end event. They will be started the + * next time through the main loop and will be + * waited for (hence the quiesce phase is not + * changed). + * + * However, already-running jobs *can* be stopped + * at this time since by definition they do not + * care about the session end event and may just + * as well die now to avoid slowing the shutdown. + */ + job_process_stop_all (); + } else { + nih_debug ("Skipping wait phase"); + quiesce_phase = QUIESCE_PHASE_KILL; + } + } + + if (quiesce_phase == QUIESCE_PHASE_KILL) { /* We'll attempt to wait for this long, but system * policy may prevent it such that we just get killed * and job processes reparented to PID 1. @@ -153,32 +210,34 @@ nih_assert (timer); nih_assert (quiesce_phase_time); + nih_assert (quiesce_requester != QUIESCE_REQUESTER_INVALID); now = time (NULL); - nih_assert (quiesce_requester != QUIESCE_REQUESTER_INVALID); - - if (quiesce_requester == QUIESCE_REQUESTER_SYSTEM) { - nih_assert (quiesce_phase == QUIESCE_PHASE_KILL); + if (quiesce_phase == QUIESCE_PHASE_KILL) { + nih_assert (max_kill_timeout); if ((now - quiesce_phase_time) > max_kill_timeout) - goto out; + goto timed_out; } else if (quiesce_phase == QUIESCE_PHASE_WAIT) { - - if ((now - quiesce_phase_time) > QUIESCE_DEFAULT_JOB_RUNTIME) { + int timed_out = 0; + + timed_out = ((now - quiesce_phase_time) >= QUIESCE_DEFAULT_JOB_RUNTIME); + + if (timed_out + || (session_end_jobs && ! job_process_jobs_running ()) + || ! job_process_jobs_running ()) { + quiesce_phase = QUIESCE_PHASE_KILL; /* reset for new phase */ quiesce_phase_time = time (NULL); max_kill_timeout = job_class_max_kill_timeout (); + job_process_stop_all (); } - } else if (quiesce_phase == QUIESCE_PHASE_KILL) { - - if ((now - quiesce_phase_time) > max_kill_timeout) - goto out; } else { nih_assert_not_reached (); } @@ -188,9 +247,10 @@ return; +timed_out: + quiesce_show_slow_jobs (); + out: - quiesce_show_slow_jobs (); - /* Note that we might skip the kill phase for the session * requestor if no jobs are actually running at this point. */ @@ -237,7 +297,100 @@ void quiesce_finalise (void) { + static int finalising = FALSE; + time_t diff; + + nih_assert (quiesce_start_time); nih_assert (quiesce_phase == QUIESCE_PHASE_CLEANUP); + if (finalising) + return; + + finalising = TRUE; + + diff = time (NULL) - quiesce_start_time; + + nih_info (_("Quiesce %s sequence took %s%d second%s"), + quiesce_reason, + ! (int)diff ? "<" : "", + (int)diff ? (int)diff : 1, + diff <= 1 ? "" : "s"); + nih_main_loop_exit (0); + +} + +/** + * quiesce_complete: + * + * Force quiesce phase to finish. + **/ +void +quiesce_complete (void) +{ + quiesce_phase = QUIESCE_PHASE_CLEANUP; + + quiesce_finalise (); +} + +/** + * quiesce_event_match: + * @event: event. + * + * Identify if any jobs _may_ start when the session ends. + * + * A simple heuristic is used such that there is no guarantee that the + * jobs entire start condition will be satisfied at session-end. + * + * Returns: TRUE if any class specifies @event in its start + * condition, else FALSE. + **/ +static int +quiesce_event_match (Event *event) +{ + nih_assert (event); + + job_class_init (); + + NIH_HASH_FOREACH (job_classes, iter) { + JobClass *class = (JobClass *)iter; + + if (! class->start_on) + continue; + + /* Note that only the jobs start on condition is + * relevant. + */ + NIH_TREE_FOREACH_POST (&class->start_on->node, iter) { + EventOperator *oper = (EventOperator *)iter; + + switch (oper->type) { + case EVENT_OR: + case EVENT_AND: + break; + case EVENT_MATCH: + /* Job may attempt to start as the session ends */ + if (event_operator_match (oper, event, NULL)) + return TRUE; + break; + default: + nih_assert_not_reached (); + } + } + } + + return FALSE; +} + +/** + * quiesce_in_progress: + * + * Determine if shutdown is in progress. + * + * Returns: TRUE if quiesce is in progress, else FALSE. + **/ +int +quiesce_in_progress (void) +{ + return quiesce_phase != QUIESCE_PHASE_NOT_QUIESCED; } === modified file 'init/quiesce.h' --- init/quiesce.h 2013-02-08 16:15:23 +0000 +++ init/quiesce.h 2013-09-26 16:33:07 +0000 @@ -70,6 +70,9 @@ void quiesce_wait_callback (void *data, NihTimer *timer); void quiesce_show_slow_jobs (void); void quiesce_finalise (void); +void quiesce_complete (void); +int quiesce_in_progress (void) + __attribute__ ((warn_unused_result)); NIH_END_EXTERN === modified file 'test/test_util_common.c' --- test/test_util_common.c 2013-07-17 14:18:42 +0000 +++ test/test_util_common.c 2013-09-26 16:33:07 +0000 @@ -384,7 +384,7 @@ argv = NIH_MUST (nih_str_array_new (NULL)); NIH_MUST (nih_str_array_add (&argv, NULL, NULL, - UPSTART_BINARY)); + get_upstart_binary ())); if (args) NIH_MUST (nih_str_array_append (&argv, NULL, NULL, args)); @@ -447,9 +447,6 @@ } NIH_MUST (nih_str_array_add (&args, NULL, NULL, - "--no-startup-event")); - - NIH_MUST (nih_str_array_add (&args, NULL, NULL, "--no-sessions")); NIH_MUST (nih_str_array_add (&args, NULL, NULL, @@ -561,7 +558,11 @@ const char * get_upstart_binary (void) { - return UPSTART_BINARY; + static const char *upstart_binary = UPSTART_BINARY; + + TEST_TRUE (file_exists (upstart_binary)); + + return upstart_binary; } const char * @@ -740,3 +741,21 @@ return new; } + +/** + * file_exists: + * @path: file to check. + * + * Determine if specified file exists. + * + * Returns: TRUE if @path exists, else FALSE. + **/ +int +file_exists (const char *path) +{ + struct stat st; + + nih_assert (path); + + return ! stat (path, &st); +} === modified file 'test/test_util_common.h' --- test/test_util_common.h 2013-07-17 14:18:42 +0000 +++ test/test_util_common.h 2013-09-26 16:33:07 +0000 @@ -10,7 +10,7 @@ #define BUFFER_SIZE 1024 /** - * TEST_QUIESCE_WAIT_PHASE: + * TEST_EXIT_TIME: * * Maximum time we expect upstart to wait in the QUIESCE_PHASE_WAIT * phase. @@ -733,4 +733,7 @@ const char *from, const char *to) __attribute__ ((warn_unused_result)); +int file_exists (const char *path) + __attribute__ ((warn_unused_result)); + #endif /* TEST_UTIL_COMMON_H */ === modified file 'util/tests/test_initctl.c' --- util/tests/test_initctl.c 2013-09-05 16:19:06 +0000 +++ util/tests/test_initctl.c 2013-09-26 16:33:07 +0000 @@ -11590,6 +11590,49 @@ DELETE_FILE (confdir, "long-running.conf"); /*******************************************************************/ + TEST_FEATURE ("session shutdown: one long-running job which starts on startup"); + + CREATE_FILE (confdir, "startup.conf", + "start on startup\n" + "exec sleep 999"); + + start_upstart_common (&upstart_pid, TRUE, confdir, logdir, NULL); + + upstart = upstart_open (NULL); + TEST_NE_P (upstart, NULL); + + /* Should be running */ + assert0 (kill (upstart_pid, 0)); + + job_pid = job_to_pid ("startup"); + TEST_NE (job_pid, -1); + + /* Force reset */ + test_user_mode = FALSE; + + /* Trigger session shutdown */ + assert0 (upstart_end_session_sync (NULL, upstart)); + + /* Session Init should end very quickly since there will be no + * wait phase. + */ + TEST_EQ (timed_waitpid (upstart_pid, TEST_QUIESCE_KILL_PHASE), upstart_pid); + + /* Should not now be running */ + TEST_EQ (kill (upstart_pid, 0), -1); + TEST_EQ (errno, ESRCH); + + session_file = NIH_MUST (nih_sprintf (NULL, "%s/upstart/sessions/%d.session", + sessiondir, (int)upstart_pid)); + unlink (session_file); + + /* pid should no longer exist */ + TEST_EQ (kill (job_pid, SIGKILL), -1); + TEST_EQ (errno, ESRCH); + + DELETE_FILE (confdir, "startup.conf"); + + /*******************************************************************/ TEST_FEATURE ("session shutdown: one long-running job which ignores SIGTERM"); CREATE_FILE (confdir, "long-running-term.conf", @@ -11668,7 +11711,7 @@ /* Trigger session shutdown */ assert0 (upstart_end_session_sync (NULL, upstart)); - TEST_EQ (timed_waitpid (upstart_pid, TEST_QUIESCE_KILL_PHASE), upstart_pid); + TEST_EQ (timed_waitpid (upstart_pid, TEST_QUIESCE_TOTAL_WAIT_TIME), upstart_pid); /* Should not now be running */ TEST_EQ (kill (upstart_pid, 0), -1); @@ -11731,7 +11774,7 @@ /* Trigger session shutdown */ assert0 (upstart_end_session_sync (NULL, upstart)); - TEST_EQ (timed_waitpid (upstart_pid, TEST_QUIESCE_KILL_PHASE), upstart_pid); + TEST_EQ (timed_waitpid (upstart_pid, TEST_QUIESCE_TOTAL_WAIT_TIME), upstart_pid); /* Should not now be running */ TEST_EQ (kill (upstart_pid, 0), -1);
-- upstart-devel mailing list [email protected] Modify settings or unsubscribe at: https://lists.ubuntu.com/mailman/listinfo/upstart-devel
