Merge authors:
  James Hunt (jamesodhunt)
Related merge proposals:
  https://code.launchpad.net/~jamesodhunt/upstart/bug-1227212/+merge/187844
  proposed by: James Hunt (jamesodhunt)
  review: Approve - Dmitrijs Ledkovs (xnox)
------------------------------------------------------------
revno: 1534 [merge]
committer: Dmitrijs Ledkovs <[email protected]>
branch nick: upstart
timestamp: Fri 2013-09-27 15:27:39 +0100
message:
  Merge lp:~jamesodhunt/upstart/bug-1227212
modified:
  ChangeLog
  init/event.c
  init/job_process.c
  init/quiesce.c
  init/quiesce.h
  test/test_util_common.c
  test/test_util_common.h
  util/tests/test_initctl.c


--
lp:upstart
https://code.launchpad.net/~upstart-devel/upstart/trunk

Your team Upstart Reviewers is subscribed to branch lp:upstart.
To unsubscribe from this branch go to 
https://code.launchpad.net/~upstart-devel/upstart/trunk/+edit-subscription
=== modified file 'ChangeLog'
--- ChangeLog	2013-09-13 04:44:55 +0000
+++ ChangeLog	2013-09-26 16:33:07 +0000
@@ -1,3 +1,38 @@
+2013-09-26  James Hunt  <[email protected]>
+
+	* init/event.c: event_pending_handle_jobs(): Force quiesce when all job
+	  instances have finished to speed session shutdown.
+	* init/job_process.c: job_process_jobs_running(): Only consider job
+	  instances with associated pids to avoid abstract jobs confusing the
+	  shutdown.
+	* init/quiesce.c:
+	  - quiesce(): Optimise session shutdown 
+	    - Skip wait phase if no jobs care about the 'session-end' event
+	      (LP: #1227212).
+	    - Stop already running instances if other jobs care about
+	      'session-end' to allow the already-running jobs to shut down in
+	       parallel with the newly-started session-end jobs.
+	  - quiesce_wait_callback():
+	    - Simplify logic.
+	    - Improve wait phase checks to detect earliest time to finalise.
+	  - quiesce_finalise(): Display time to shutdown.
+	  - quiesce_complete(): New function to force final shutdown phase.
+	  - quiesce_event_match(): New function to determine if any jobs
+	    'start on' contains a particular event.
+	  - quiesce_in_progress(): Determine if shutdown is being handled.
+	* test/test_util_common.c:
+	  - _start_upstart(): Call get_upstart_binary() rather than relying on
+	    UPSTART_BINARY define.
+	  - start_upstart_common(): Remove '--no-startup-event' as this is now
+	    needed by a test.
+	  - get_upstart_binary(): Assert that file exists.
+	  - file_exists(): New helper function.
+	* test/test_util_common.h: Typo and prototype.
+	* util/tests/test_initctl.c: test_quiesce():
+	  - New test "session shutdown: one long-running job which starts on
+	    startup".
+	  - Adjusted expected shutdown times.
+
 2013-09-12  Steve Langasek  <[email protected]>
 
 	* configure.ac:

=== modified file 'init/event.c'
--- init/event.c	2013-06-04 16:51:55 +0000
+++ init/event.c	2013-09-26 16:33:07 +0000
@@ -44,6 +44,7 @@
 #include "blocked.h"
 #include "control.h"
 #include "errors.h"
+#include "quiesce.h"
 
 #include "com.ubuntu.Upstart.h"
 
@@ -299,6 +300,8 @@
 static void
 event_pending_handle_jobs (Event *event)
 {
+	int  empty = TRUE;
+
 	nih_assert (event != NULL);
 
 	job_class_init ();
@@ -432,6 +435,23 @@
 			event_operator_reset (class->start_on);
 		}
 	}
+
+	/* Determine if any job instances remain */
+	NIH_HASH_FOREACH_SAFE (job_classes, iter) {
+		JobClass *class = (JobClass *)iter;
+
+		NIH_HASH_FOREACH_SAFE (class->instances, job_iter) {
+			empty = FALSE;
+			break;
+		}
+
+		if (! empty)
+			break;
+	}
+
+	/* If no instances remain, force quiesce to finish */
+	if (empty && quiesce_in_progress ())
+		quiesce_complete ();
 }
 
 

=== modified file 'init/job_process.c'
--- init/job_process.c	2013-07-01 21:05:15 +0000
+++ init/job_process.c	2013-09-26 16:33:07 +0000
@@ -1259,7 +1259,9 @@
 /**
  * job_process_jobs_running:
  *
- * Determine if any jobs are running.
+ * Determine if any jobs are running. Note that simply checking if class
+ * instances exist is insufficient: since this call is used for shutdown
+ * abstract jobs must not be able to block the shutdown.
  *
  * Returns: TRUE if jobs are still running, else FALSE.
  **/
@@ -1271,8 +1273,17 @@
 	NIH_HASH_FOREACH (job_classes, iter) {
 		JobClass *class = (JobClass *)iter;
 
-		NIH_HASH_FOREACH (class->instances, job_iter)
-			return TRUE;
+		NIH_HASH_FOREACH (class->instances, job_iter) {
+			Job *job = (Job *)job_iter;
+			nih_local char *cmd = NULL;
+			int i;
+			nih_local char *pids = NULL;
+
+			for (i = 0; i < PROCESS_LAST; i++) {
+				if (job->pid[i])
+					return TRUE;
+			}
+		}
 	}
 
 	return FALSE;

=== modified file 'init/quiesce.c'
--- init/quiesce.c	2013-07-31 09:28:48 +0000
+++ init/quiesce.c	2013-09-26 16:33:07 +0000
@@ -48,9 +48,16 @@
 static QuiescePhase quiesce_phase = QUIESCE_PHASE_NOT_QUIESCED;
 
 /**
+ * quiesce_reason:
+ *
+ * Human-readable string denoting what triggered the quiesce.
+ **/
+static char *quiesce_reason = NULL;
+
+/**
  * max_kill_timeout:
  *
- * Maxiumum kill_timout value calculated from all running jobs used to
+ * Maxiumum kill_timeout value calculated from all running jobs used to
  * determine how long to wait before exiting.
  **/
 static time_t max_kill_timeout = 0;
@@ -62,6 +69,24 @@
  **/
 static time_t quiesce_phase_time = 0;
 
+/**
+ * quiesce_start_time:
+ *
+ * Time quiesce commenced.
+ **/
+static time_t quiesce_start_time = 0;
+
+/**
+ * session_end_jobs:
+ *
+ * TRUE if any job specifies a 'start on' including SESSION_END_EVENT.
+ *
+ **/
+static int session_end_jobs = FALSE;
+
+static int quiesce_event_match (Event *event)
+	__attribute__ ((warn_unused_result));
+
 /* External definitions */
 extern int disable_respawn;
 
@@ -76,7 +101,7 @@
 quiesce (QuiesceRequester requester)
 {
 	nih_local char  **env = NULL;
-	const char       *reason;
+	Event            *event;
 
 	job_class_init ();
 
@@ -98,12 +123,12 @@
 		? QUIESCE_PHASE_KILL
 		: QUIESCE_PHASE_WAIT;
 
-	reason = (requester == QUIESCE_REQUESTER_SESSION)
+	quiesce_reason = (requester == QUIESCE_REQUESTER_SESSION)
 		? _("logout") : _("shutdown");
 
-	nih_info (_("Quiescing due to %s request"), reason);
+	nih_info (_("Quiescing due to %s request"), quiesce_reason);
 
-	quiesce_phase_time = time (NULL);
+	quiesce_start_time = quiesce_phase_time = time (NULL);
 
 	/* Stop existing jobs from respawning */
 	disable_respawn = TRUE;
@@ -116,11 +141,43 @@
 	env = NIH_MUST (nih_str_array_new (NULL));
 
 	NIH_MUST (environ_set (&env, NULL, NULL, TRUE,
-				"TYPE=%s", reason));
-
-	NIH_MUST (event_new (NULL, SESSION_END_EVENT, env));
-
-	if (requester == QUIESCE_REQUESTER_SYSTEM) {
+				"TYPE=%s", quiesce_reason));
+
+	event = NIH_MUST (event_new (NULL, SESSION_END_EVENT, env));
+
+	/* Check if any jobs care about the session end event. If not,
+	 * the wait phase can be avoided entirely resulting in a much
+	 * faster shutdown.
+	 *
+	 * Note that simply checking if running instances exist is not
+	 * sufficient since if a job cares about the session end event,
+	 * it won't yet have started but needs to be given a chance to
+	 * run.
+	 */
+	if (quiesce_phase == QUIESCE_PHASE_WAIT) {
+
+		session_end_jobs = quiesce_event_match (event);
+
+		if (session_end_jobs) {
+			/* Some as-yet unscheduled jobs care about the
+			 * session end event. They will be started the
+			 * next time through the main loop and will be
+			 * waited for (hence the quiesce phase is not
+			 * changed).
+			 *
+			 * However, already-running jobs *can* be stopped
+			 * at this time since by definition they do not
+			 * care about the session end event and may just
+			 * as well die now to avoid slowing the shutdown.
+			 */
+			job_process_stop_all ();
+		} else {
+			nih_debug ("Skipping wait phase");
+			quiesce_phase = QUIESCE_PHASE_KILL;
+		}
+	}
+
+	if (quiesce_phase == QUIESCE_PHASE_KILL) {
 		/* We'll attempt to wait for this long, but system
 		 * policy may prevent it such that we just get killed
 		 * and job processes reparented to PID 1.
@@ -153,32 +210,34 @@
 
 	nih_assert (timer);
 	nih_assert (quiesce_phase_time);
+	nih_assert (quiesce_requester != QUIESCE_REQUESTER_INVALID);
 
 	now = time (NULL);
 
-	nih_assert (quiesce_requester != QUIESCE_REQUESTER_INVALID);
-
-	if (quiesce_requester == QUIESCE_REQUESTER_SYSTEM) {
-		nih_assert (quiesce_phase == QUIESCE_PHASE_KILL);
+	if (quiesce_phase == QUIESCE_PHASE_KILL) {
+		nih_assert (max_kill_timeout);
 
 		if ((now - quiesce_phase_time) > max_kill_timeout)
-			goto out;
+			goto timed_out;
 
 	} else if (quiesce_phase == QUIESCE_PHASE_WAIT) {
-
-		if ((now - quiesce_phase_time) > QUIESCE_DEFAULT_JOB_RUNTIME) {
+		int  timed_out = 0;
+
+		timed_out = ((now - quiesce_phase_time) >= QUIESCE_DEFAULT_JOB_RUNTIME);
+
+		if (timed_out
+			|| (session_end_jobs && ! job_process_jobs_running ())
+			|| ! job_process_jobs_running ()) {
+
 			quiesce_phase = QUIESCE_PHASE_KILL;
 
 			/* reset for new phase */
 			quiesce_phase_time = time (NULL);
 
 			max_kill_timeout = job_class_max_kill_timeout ();
+
 			job_process_stop_all ();
 		}
-	} else if (quiesce_phase == QUIESCE_PHASE_KILL) {
-
-		if ((now - quiesce_phase_time) > max_kill_timeout)
-			goto out;
 	} else {
 		nih_assert_not_reached ();
 	}
@@ -188,9 +247,10 @@
 
 	return;
 
+timed_out:
+	quiesce_show_slow_jobs ();
+
 out:
-	quiesce_show_slow_jobs ();
-
 	/* Note that we might skip the kill phase for the session
 	 * requestor if no jobs are actually running at this point.
 	 */
@@ -237,7 +297,100 @@
 void
 quiesce_finalise (void)
 {
+	static int  finalising = FALSE;
+	time_t      diff;
+
+	nih_assert (quiesce_start_time);
 	nih_assert (quiesce_phase == QUIESCE_PHASE_CLEANUP);
 
+	if (finalising)
+		return;
+
+	finalising = TRUE;
+
+	diff = time (NULL) - quiesce_start_time;
+
+	nih_info (_("Quiesce %s sequence took %s%d second%s"),
+			quiesce_reason,
+			! (int)diff ? "<" : "",
+			(int)diff ? (int)diff : 1,
+			diff <= 1 ? "" : "s");
+
 	nih_main_loop_exit (0);
+
+}
+
+/**
+ * quiesce_complete:
+ *
+ * Force quiesce phase to finish.
+ **/
+void
+quiesce_complete (void)
+{
+	quiesce_phase = QUIESCE_PHASE_CLEANUP;
+
+	quiesce_finalise ();
+}
+
+/**
+ * quiesce_event_match:
+ * @event: event.
+ *
+ * Identify if any jobs _may_ start when the session ends.
+ *
+ * A simple heuristic is used such that there is no guarantee that the
+ * jobs entire start condition will be satisfied at session-end.
+ *
+ * Returns: TRUE if any class specifies @event in its start
+ * condition, else FALSE.
+ **/
+static int
+quiesce_event_match (Event *event)
+{
+	nih_assert (event);
+
+	job_class_init ();
+
+	NIH_HASH_FOREACH (job_classes, iter) {
+		JobClass *class = (JobClass *)iter;
+
+		if (! class->start_on)
+			continue;
+
+		/* Note that only the jobs start on condition is
+		 * relevant.
+		 */
+		NIH_TREE_FOREACH_POST (&class->start_on->node, iter) {
+			EventOperator *oper = (EventOperator *)iter;
+
+			switch (oper->type) {
+			case EVENT_OR:
+			case EVENT_AND:
+				break;
+			case EVENT_MATCH:
+				/* Job may attempt to start as the session ends */
+				if (event_operator_match (oper, event, NULL))
+					return TRUE;
+				break;
+			default:
+				nih_assert_not_reached ();
+			}
+		}
+	}
+
+	return FALSE;
+}
+
+/**
+ * quiesce_in_progress:
+ *
+ * Determine if shutdown is in progress.
+ *
+ * Returns: TRUE if quiesce is in progress, else FALSE.
+ **/
+int
+quiesce_in_progress (void)
+{
+	return quiesce_phase != QUIESCE_PHASE_NOT_QUIESCED;
 }

=== modified file 'init/quiesce.h'
--- init/quiesce.h	2013-02-08 16:15:23 +0000
+++ init/quiesce.h	2013-09-26 16:33:07 +0000
@@ -70,6 +70,9 @@
 void    quiesce_wait_callback  (void *data, NihTimer *timer);
 void    quiesce_show_slow_jobs (void);
 void    quiesce_finalise       (void);
+void    quiesce_complete       (void);
+int     quiesce_in_progress    (void)
+	__attribute__ ((warn_unused_result));
 
 NIH_END_EXTERN
 

=== modified file 'test/test_util_common.c'
--- test/test_util_common.c	2013-07-17 14:18:42 +0000
+++ test/test_util_common.c	2013-09-26 16:33:07 +0000
@@ -384,7 +384,7 @@
 	argv = NIH_MUST (nih_str_array_new (NULL));
 
 	NIH_MUST (nih_str_array_add (&argv, NULL, NULL,
-				UPSTART_BINARY));
+				get_upstart_binary ()));
 
 	if (args)
 		NIH_MUST (nih_str_array_append (&argv, NULL, NULL, args));
@@ -447,9 +447,6 @@
 	}
 
 	NIH_MUST (nih_str_array_add (&args, NULL, NULL,
-				"--no-startup-event"));
-
-	NIH_MUST (nih_str_array_add (&args, NULL, NULL,
 				"--no-sessions"));
 
 	NIH_MUST (nih_str_array_add (&args, NULL, NULL,
@@ -561,7 +558,11 @@
 const char *
 get_upstart_binary (void)
 {
-	return UPSTART_BINARY;
+	static const char *upstart_binary = UPSTART_BINARY;
+
+	TEST_TRUE (file_exists (upstart_binary));
+
+	return upstart_binary;
 }
 
 const char *
@@ -740,3 +741,21 @@
 
 	return new;
 }
+
+/**
+ * file_exists:
+ * @path: file to check.
+ *
+ * Determine if specified file exists.
+ *
+ * Returns: TRUE if @path exists, else FALSE.
+ **/
+int
+file_exists (const char *path)
+{
+	struct stat  st;
+
+	nih_assert (path);
+
+	return ! stat (path, &st);
+}

=== modified file 'test/test_util_common.h'
--- test/test_util_common.h	2013-07-17 14:18:42 +0000
+++ test/test_util_common.h	2013-09-26 16:33:07 +0000
@@ -10,7 +10,7 @@
 #define BUFFER_SIZE 1024
 
 /**
- * TEST_QUIESCE_WAIT_PHASE:
+ * TEST_EXIT_TIME:
  *
  * Maximum time we expect upstart to wait in the QUIESCE_PHASE_WAIT
  * phase.
@@ -733,4 +733,7 @@
 			  const char *from, const char *to)
 	__attribute__ ((warn_unused_result));
 
+int file_exists (const char *path)
+	__attribute__ ((warn_unused_result));
+
 #endif /* TEST_UTIL_COMMON_H */

=== modified file 'util/tests/test_initctl.c'
--- util/tests/test_initctl.c	2013-09-05 16:19:06 +0000
+++ util/tests/test_initctl.c	2013-09-26 16:33:07 +0000
@@ -11590,6 +11590,49 @@
 	DELETE_FILE (confdir, "long-running.conf");
 
 	/*******************************************************************/
+	TEST_FEATURE ("session shutdown: one long-running job which starts on startup");
+
+	CREATE_FILE (confdir, "startup.conf",
+			"start on startup\n"
+			"exec sleep 999");
+
+	start_upstart_common (&upstart_pid, TRUE, confdir, logdir, NULL);
+
+	upstart = upstart_open (NULL);
+	TEST_NE_P (upstart, NULL);
+
+	/* Should be running */
+	assert0 (kill (upstart_pid, 0));
+
+	job_pid = job_to_pid ("startup");
+	TEST_NE (job_pid, -1);
+
+	/* Force reset */
+	test_user_mode = FALSE;
+
+	/* Trigger session shutdown */
+	assert0 (upstart_end_session_sync (NULL, upstart));
+
+	/* Session Init should end very quickly since there will be no
+	 * wait phase.
+	 */
+	TEST_EQ (timed_waitpid (upstart_pid, TEST_QUIESCE_KILL_PHASE), upstart_pid);
+
+	/* Should not now be running */
+	TEST_EQ (kill (upstart_pid, 0), -1);
+	TEST_EQ (errno, ESRCH);
+
+	session_file = NIH_MUST (nih_sprintf (NULL, "%s/upstart/sessions/%d.session",
+				sessiondir, (int)upstart_pid));
+	unlink (session_file);
+
+	/* pid should no longer exist */
+	TEST_EQ (kill (job_pid, SIGKILL), -1);
+	TEST_EQ (errno, ESRCH);
+
+	DELETE_FILE (confdir, "startup.conf");
+
+	/*******************************************************************/
 	TEST_FEATURE ("session shutdown: one long-running job which ignores SIGTERM");
 
 	CREATE_FILE (confdir, "long-running-term.conf",
@@ -11668,7 +11711,7 @@
 	/* Trigger session shutdown */
 	assert0 (upstart_end_session_sync (NULL, upstart));
 
-	TEST_EQ (timed_waitpid (upstart_pid, TEST_QUIESCE_KILL_PHASE), upstart_pid);
+	TEST_EQ (timed_waitpid (upstart_pid, TEST_QUIESCE_TOTAL_WAIT_TIME), upstart_pid);
 
 	/* Should not now be running */
 	TEST_EQ (kill (upstart_pid, 0), -1);
@@ -11731,7 +11774,7 @@
 	/* Trigger session shutdown */
 	assert0 (upstart_end_session_sync (NULL, upstart));
 
-	TEST_EQ (timed_waitpid (upstart_pid, TEST_QUIESCE_KILL_PHASE), upstart_pid);
+	TEST_EQ (timed_waitpid (upstart_pid, TEST_QUIESCE_TOTAL_WAIT_TIME), upstart_pid);
 
 	/* Should not now be running */
 	TEST_EQ (kill (upstart_pid, 0), -1);

-- 
upstart-devel mailing list
[email protected]
Modify settings or unsubscribe at: 
https://lists.ubuntu.com/mailman/listinfo/upstart-devel

Reply via email to