James Hunt has proposed merging lp:~jamesodhunt/upstart/bug-1357252 into 
lp:upstart.

Requested reviews:
  Upstart Reviewers (upstart-reviewers)
Related bugs:
  Bug #1357252 in upstart : "upstart can race with cgmanager when using 
remove-on-empty"
  https://bugs.launchpad.net/upstart/+bug/1357252

For more details, see:
https://code.launchpad.net/~jamesodhunt/upstart/bug-1357252/+merge/232680

* init/cgroup.c:
  - Removed nih_debug() and nih_warn() calls since, although
    useful, this output pollutes job logs when running in debug mode.
  - cgroup_clear(): New function to request cgroups be removed.
  - cgroup_create(): Don't mark cgroups 'remove-on-empty' since Upstart
    can race with cgmanager.
* init/job.c: job_last_process(): New helper function.
* init/job_process.c:
  - job_process_spawn_with_fd(): Request that the
    cgroup manager destroy all job cgroups after upstart has created
    required cgroups for last job process which avoids the
    'remove-on-empty' race (LP: #1357252).
  - job_process_error_handler(): Added handling for new
    JOB_PROCESS_ERROR_CGROUP_CLEAR error.
* init/job_process.h: JobProcessErrorType: Added new
  JOB_PROCESS_ERROR_CGROUP_CLEAR error.
* init/tests/test_job.c: test_job_last_process(): New test for
  job_last_process().
-- 
https://code.launchpad.net/~jamesodhunt/upstart/bug-1357252/+merge/232680
Your team Upstart Reviewers is requested to review the proposed merge of 
lp:~jamesodhunt/upstart/bug-1357252 into lp:upstart.
=== modified file 'ChangeLog'
--- ChangeLog	2014-08-14 11:19:43 +0000
+++ ChangeLog	2014-08-29 09:06:28 +0000
@@ -1,3 +1,24 @@
+2014-08-29  James Hunt  <[email protected]>
+
+	* init/cgroup.c:
+	  - Removed nih_debug() and nih_warn() calls since, although
+	    useful, this output pollutes job logs when running in debug mode.
+	  - cgroup_clear(): New function to request cgroups be removed.
+	  - cgroup_create(): Don't mark cgroups 'remove-on-empty' since Upstart
+	    can race with cgmanager.
+	* init/job.c: job_last_process(): New helper function.
+	* init/job_process.c:
+	  - job_process_spawn_with_fd(): Request that the
+	    cgroup manager destroy all job cgroups after upstart has created
+	    required cgroups for last job process which avoids the
+	    'remove-on-empty' race (LP: #1357252).
+	  - job_process_error_handler(): Added handling for new
+	    JOB_PROCESS_ERROR_CGROUP_CLEAR error.
+	* init/job_process.h: JobProcessErrorType: Added new
+	  JOB_PROCESS_ERROR_CGROUP_CLEAR error.
+	* init/tests/test_job.c: test_job_last_process(): New test for
+	  job_last_process().
+
 2014-08-14  James Hunt  <[email protected]>
 
 	* init/control.c: Disallow modifying system jobs via SetEnv,

=== modified file 'init/cgroup.c'
--- init/cgroup.c	2014-07-01 13:17:52 +0000
+++ init/cgroup.c	2014-08-29 09:06:28 +0000
@@ -302,6 +302,57 @@
 }
 
 /**
+ * cgroup_clear:
+ *
+ * @cgroups: list of CGroup objects,
+ *
+ * Remove all cgroups associated with this job.
+ *
+ * Should be called by the _last_ job process to avoid racing with the
+ * cgroup manager which may remove an empty cgroup before the latest job
+ * process (which needs the cgroup) has been spawned.
+ *
+ * Returns: TRUE on success, FALSE on raised error.
+ **/
+int
+cgroup_clear (NihList *cgroups)
+{
+	nih_assert (cgroups);
+	nih_assert (cgroup_manager);
+
+	if (! cgroup_support_enabled ())
+		return TRUE;
+
+	if (NIH_LIST_EMPTY (cgroups))
+		return TRUE;
+
+	NIH_LIST_FOREACH (cgroups, iter) {
+		CGroup *cgroup = (CGroup *)iter;
+
+		NIH_LIST_FOREACH (&cgroup->names, iter2) {
+			CGroupName   *cgname = (CGroupName *)iter2;
+			char         *cgpath;
+			int           ret;
+
+			cgpath = cgname->expanded ? cgname->expanded : cgname->name;
+
+			/* Get the cgroup manager to delete the cgroup once no more job
+			 * processes remain in it.
+			 */
+			ret = cgmanager_remove_on_empty_sync (NULL,
+					cgroup_manager,
+					cgroup->controller,
+					cgpath);
+
+			if (ret < 0) 
+				return FALSE;
+		}
+	}
+
+	return TRUE;
+}
+
+/**
  * cgroup_setup:
  *
  * @cgroups: list of CGroup objects,
@@ -316,7 +367,10 @@
  * Returns: TRUE on success, FALSE on raised error.
  **/
 int
-cgroup_setup (NihList *cgroups, char * const *env, uid_t uid, gid_t gid)
+cgroup_setup (NihList       *cgroups,
+	      char * const  *env,
+	      uid_t          uid,
+	      gid_t          gid)
 {
 	const char       *upstart_job = NULL;
 	const char       *upstart_instance = NULL;
@@ -1004,8 +1058,6 @@
 	/* Drop initial reference now the proxy holds one */
 	dbus_connection_unref (connection);
 
-	nih_debug ("Connected to cgroup manager");
-
 	return 0;
 }
 
@@ -1021,8 +1073,6 @@
 	nih_assert (connection);
 	nih_assert (cgroup_manager_address);
 
-	nih_warn (_("Disconnected from cgroup manager"));
-
 	cgroup_manager = NULL;
 	nih_free (cgroup_manager_address);
 	cgroup_manager_address = NULL;
@@ -1075,15 +1125,11 @@
 						   UPSTART_CGROUP_ROOT,
 						   pid);
 
-
 		if (ret < 0)
 			return FALSE;
-
-		nih_debug ("Moved pid %d to root of '%s' controller cgroup",
-			   pid, controller);
 	}
 
-	/* Ask cgmanager to create the cgroup */
+	/* Ask the cgroup manager to create the cgroup */
 	ret = cgmanager_create_sync (NULL,
 			cgroup_manager,
 			controller,
@@ -1093,40 +1139,6 @@
 	if (ret < 0)
 		return FALSE;
 
-	nih_debug ("%s '%s' controller cgroup '%s'",
-			! existed ? "Created" : "Using existing",
-			controller, path);
-
-	/* Get the cgroup manager to delete the cgroup once no more job
-	 * processes remain in it. Never mind if auto-deletion occurs between
-	 * a jobs processes since the group will be recreated anyway by
-	 * cgroup_create().
-	 *
-	 * This may seem incorrect since if we create the group,
-	 * then mark it to be auto-removed when empty, surely
-	 * it will be immediately deleted? However, the way this works
-	 * is that the group will be deleted once it has _become_ empty
-	 * (having at some time *not* been empty).
-	 *
-	 * The logic of using auto-delete is slightly inefficient
-	 * in terms of cgmanager usage, but is hugely beneficial to
-	 * Upstart since it avoids having to store details of which
-	 * groups were created by jobs and also avoids the complexity of
-	 * the child (which is responsible for creating the cgroups)
-	 * pass back these details asynchronously to the parent to avoid
-	 * it blocking.
-	 */
-	ret = cgmanager_remove_on_empty_sync (NULL,
-			cgroup_manager,
-			controller,
-			path);
-
-	if (ret < 0)
-		return FALSE;
-
-	nih_debug ("Set remove on empty for '%s' controller cgroup '%s'",
-			controller, path);
-
 	return TRUE;
 }
 
@@ -1162,9 +1174,6 @@
 	if (ret < 0)
 		return FALSE;
 
-	nih_debug ("Moved pid %d to '%s' controller cgroup '%s'",
-			pid, controller, path);
-
 	return TRUE;
 }
 
@@ -1371,9 +1380,6 @@
 			return FALSE;
 	}
 
-	nih_debug ("Applied cgroup settings to '%s' controller cgroup '%s'",
-			controller, path);
-
 	return TRUE;
 }
 
@@ -1455,8 +1461,5 @@
 	if (ret < 0)
 		return FALSE;
 
-	nih_debug ("Changed ownership of '%s' controller cgroup '%s'",
-			controller, path);
-
 	return TRUE;
 }

=== modified file 'init/cgroup.h'
--- init/cgroup.h	2014-07-01 13:11:34 +0000
+++ init/cgroup.h	2014-08-29 09:06:28 +0000
@@ -172,6 +172,8 @@
 int cgroup_manager_available (void)
 	__attribute__ ((warn_unused_result));
 
+int cgroup_clear (NihList *cgroups);
+
 int cgroup_setup (NihList *cgroups, char * const *env,
 		uid_t uid, gid_t gid)
 	__attribute__ ((warn_unused_result));

=== modified file 'init/job.c'
--- init/job.c	2014-07-10 16:05:04 +0000
+++ init/job.c	2014-08-29 09:06:28 +0000
@@ -2683,3 +2683,33 @@
 		nih_assert_not_reached ();
 	}
 }
+
+#ifdef ENABLE_CGROUPS
+/**
+ * job_last_process:
+ *
+ * @job: job,
+ * @process: process.
+ *
+ * Returns: TRUE if the last defined process for @job is @process,
+ *  else FALSE.
+ **/
+int
+job_last_process (const Job *job, ProcessType process)
+{
+	ProcessType  i;
+	ProcessType  last = PROCESS_INVALID;
+
+	nih_assert (job);
+	nih_assert (process >= PROCESS_MAIN);
+	nih_assert (process < PROCESS_LAST);
+
+	for (i = 0; i < PROCESS_LAST; i++) {
+		if (job->class->process[i])
+			last = i;
+	}
+
+	return last == process ? TRUE : FALSE;
+}
+#endif /* ENABLE_CGROUPS */
+

=== modified file 'init/job.h'
--- init/job.h	2014-06-02 20:29:33 +0000
+++ init/job.h	2014-08-29 09:06:28 +0000
@@ -300,6 +300,13 @@
 int job_needs_cgroups (const Job *job)
 	__attribute__ ((warn_unused_result));
 
+#ifdef ENABLE_CGROUPS
+
+int job_last_process (const Job *job, ProcessType process)
+	__attribute__ ((warn_unused_result));
+
+#endif /* ENABLE_CGROUPS */
+
 NIH_END_EXTERN
 
 #endif /* INIT_JOB_H */

=== modified file 'init/job_process.c'
--- init/job_process.c	2014-07-10 16:07:57 +0000
+++ init/job_process.c	2014-08-29 09:06:28 +0000
@@ -890,6 +890,16 @@
 				job_process_error_abort (fds[1], JOB_PROCESS_ERROR_CGROUP_SETUP, 0);
 			}
 
+			/* If spawning the last process for the job,
+			 * arrange for the cgroup manager to destroy
+			 * all job cgroups relating to this job once all
+			 * job processes have completed.
+			 */
+			if (job_last_process (job, process)) {
+				if (! cgroup_clear (&job->class->cgroups)) {
+					job_process_error_abort (fds[1], JOB_PROCESS_ERROR_CGROUP_CLEAR, 0);
+				}
+			}
 		}
 #endif /* ENABLE_CGROUPS */
 
@@ -1248,6 +1258,11 @@
 				  err, _("unable to enter cgroup: %s"),
 				  strerror (err->errnum)));
 		break;
+	case JOB_PROCESS_ERROR_CGROUP_CLEAR:
+		err->error.message = NIH_MUST (nih_sprintf (
+				  err, _("unable to mark cgroups for removal: %s"),
+				  strerror (err->errnum)));
+		break;
 
 	default:
 		nih_assert_not_reached ();
@@ -1806,7 +1821,6 @@
 		nih_assert_not_reached ();
 	}
 
-
 	/* Cancel any timer trying to kill the job, since it's just
 	 * died.  We could do this inside the main process block above, but
 	 * leaving it here for now means we can use the timer for any

=== modified file 'init/job_process.h'
--- init/job_process.h	2014-07-09 16:12:52 +0000
+++ init/job_process.h	2014-08-29 09:06:28 +0000
@@ -98,7 +98,8 @@
 	JOB_PROCESS_ERROR_SECURITY,
 	JOB_PROCESS_ERROR_CGROUP_MGR_CONNECT,
 	JOB_PROCESS_ERROR_CGROUP_SETUP,
-	JOB_PROCESS_ERROR_CGROUP_ENTER
+	JOB_PROCESS_ERROR_CGROUP_ENTER,
+	JOB_PROCESS_ERROR_CGROUP_CLEAR
 } JobProcessErrorType;
 
 /**

=== modified file 'init/tests/test_job.c'
--- init/tests/test_job.c	2014-05-21 21:12:08 +0000
+++ init/tests/test_job.c	2014-08-29 09:06:28 +0000
@@ -7790,6 +7790,159 @@
 	NIH_OPTION_LAST
 };
 
+void
+test_job_last_process (void)
+{
+	ConfFile    *file;
+	ConfSource  *source;
+	JobClass    *class;
+	Job         *job;
+	int          i;
+	int          ret;
+
+	TEST_FUNCTION ("job_last_process");
+
+	nih_error_init ();
+	conf_init ();
+	job_class_init ();
+
+	TEST_HASH_EMPTY (job_classes);
+
+	source = conf_source_new (NULL, "/tmp", CONF_JOB_DIR);
+	TEST_NE_P (source, NULL);
+
+	file = conf_file_new (source, "/tmp/test");
+	TEST_NE_P (file, NULL);
+
+	class = file->job = job_class_new (NULL, "test", NULL);
+	TEST_NE_P (class, NULL);
+
+	job = job_new (class, "");
+	TEST_NE_P (job, NULL);
+
+	TEST_HASH_EMPTY (job_classes);
+	TEST_TRUE (job_class_consider (class));
+	TEST_HASH_NOT_EMPTY (job_classes);
+
+	/*******************************************************************/
+	TEST_FEATURE ("no job processes");
+
+	for (i = 0; i < PROCESS_LAST; i++) {
+		ret = job_last_process (job, i);
+		TEST_FALSE (ret);
+	}
+
+	/*******************************************************************/
+	TEST_FEATURE ("first job process");
+
+	class->process[PROCESS_MAIN] = process_new (class);
+	TEST_NE_P (class->process[PROCESS_MAIN], NULL);
+
+	for (i = 0; i < PROCESS_LAST; i++) {
+		ret = job_last_process (job, i);
+		if (i == PROCESS_MAIN) {
+			TEST_TRUE (ret);
+		} else {
+			TEST_FALSE (ret);
+		}
+	}
+
+	nih_free (class->process[PROCESS_MAIN]);
+	class->process[PROCESS_MAIN] = NULL;
+
+	/*******************************************************************/
+	TEST_FEATURE ("last job process");
+
+	class->process[PROCESS_SECURITY] = process_new (class);
+	TEST_NE_P (class->process[PROCESS_SECURITY], NULL);
+
+	for (i = 0; i < PROCESS_LAST; i++) {
+		ret = job_last_process (job, i);
+		if (i == PROCESS_SECURITY) {
+			TEST_TRUE (ret);
+		} else {
+			TEST_FALSE (ret);
+		}
+	}
+
+	nih_free (class->process[PROCESS_SECURITY]);
+	class->process[PROCESS_SECURITY] = NULL;
+
+	/*******************************************************************/
+	TEST_FEATURE ("PROCESS_PRE_STOP job process");
+
+	class->process[PROCESS_PRE_STOP] = process_new (class);
+	TEST_NE_P (class->process[PROCESS_PRE_STOP], NULL);
+
+	for (i = 0; i < PROCESS_LAST; i++) {
+		ret = job_last_process (job, i);
+		if (i == PROCESS_PRE_STOP) {
+			TEST_TRUE (ret);
+		} else {
+			TEST_FALSE (ret);
+		}
+	}
+
+	nih_free (class->process[PROCESS_PRE_STOP]);
+	class->process[PROCESS_PRE_STOP] = NULL;
+
+	/*******************************************************************/
+	TEST_FEATURE ("all job processes set");
+
+	for (i = 0; i < PROCESS_LAST; i++) {
+		class->process[i] = process_new (class);
+		TEST_NE_P (class->process[i], NULL);
+	}
+
+	for (i = 0; i < PROCESS_LAST; i++) {
+		ret = job_last_process (job, i);
+		if (i == PROCESS_SECURITY) {
+			TEST_TRUE (ret);
+		} else {
+			TEST_FALSE (ret);
+		}
+	}
+
+	for (i = 0; i < PROCESS_LAST; i++) {
+		nih_free (class->process[i]);
+		class->process[i] = NULL;
+	}
+
+	/*******************************************************************/
+	TEST_FEATURE ("all job processes set except PROCESS_SECURITY");
+
+	for (i = 0; i < PROCESS_SECURITY; i++) {
+		class->process[i] = process_new (class);
+		TEST_NE_P (class->process[i], NULL);
+	}
+
+	for (i = 0; i < PROCESS_LAST; i++) {
+		ret = job_last_process (job, i);
+		if (i == PROCESS_POST_STOP) {
+			TEST_TRUE (ret);
+		} else {
+			TEST_FALSE (ret);
+		}
+	}
+
+	for (i = 0; i < PROCESS_SECURITY; i++) {
+		nih_free (class->process[i]);
+		class->process[i] = NULL;
+	}
+
+	/***********************************************************/
+	/* clean up */
+
+	nih_free (conf_sources);
+	nih_free (job_classes);
+
+	conf_sources = NULL;
+	job_classes = NULL;
+
+	conf_init ();
+	job_class_init ();
+}
+
 
 int
 main (int   argc,
@@ -7841,6 +7994,7 @@
 	test_deserialise_ptrace ();
 
 	test_job_find ();
+	test_job_last_process ();
 
 	return 0;
 }

-- 
upstart-devel mailing list
[email protected]
Modify settings or unsubscribe at: 
https://lists.ubuntu.com/mailman/listinfo/upstart-devel

Reply via email to