James Hunt has proposed merging lp:~jamesodhunt/upstart/bug-1357252 into lp:upstart.
Requested reviews: Upstart Reviewers (upstart-reviewers) Related bugs: Bug #1357252 in upstart : "upstart can race with cgmanager when using remove-on-empty" https://bugs.launchpad.net/upstart/+bug/1357252 For more details, see: https://code.launchpad.net/~jamesodhunt/upstart/bug-1357252/+merge/232680 * init/cgroup.c: - Removed nih_debug() and nih_warn() calls since, although useful, this output pollutes job logs when running in debug mode. - cgroup_clear(): New function to request cgroups be removed. - cgroup_create(): Don't mark cgroups 'remove-on-empty' since Upstart can race with cgmanager. * init/job.c: job_last_process(): New helper function. * init/job_process.c: - job_process_spawn_with_fd(): Request that the cgroup manager destroy all job cgroups after upstart has created required cgroups for last job process which avoids the 'remove-on-empty' race (LP: #1357252). - job_process_error_handler(): Added handling for new JOB_PROCESS_ERROR_CGROUP_CLEAR error. * init/job_process.h: JobProcessErrorType: Added new JOB_PROCESS_ERROR_CGROUP_CLEAR error. * init/tests/test_job.c: test_job_last_process(): New test for job_last_process(). -- https://code.launchpad.net/~jamesodhunt/upstart/bug-1357252/+merge/232680 Your team Upstart Reviewers is requested to review the proposed merge of lp:~jamesodhunt/upstart/bug-1357252 into lp:upstart.
=== modified file 'ChangeLog' --- ChangeLog 2014-08-14 11:19:43 +0000 +++ ChangeLog 2014-08-29 09:06:28 +0000 @@ -1,3 +1,24 @@ +2014-08-29 James Hunt <[email protected]> + + * init/cgroup.c: + - Removed nih_debug() and nih_warn() calls since, although + useful, this output pollutes job logs when running in debug mode. + - cgroup_clear(): New function to request cgroups be removed. + - cgroup_create(): Don't mark cgroups 'remove-on-empty' since Upstart + can race with cgmanager. + * init/job.c: job_last_process(): New helper function. + * init/job_process.c: + - job_process_spawn_with_fd(): Request that the + cgroup manager destroy all job cgroups after upstart has created + required cgroups for last job process which avoids the + 'remove-on-empty' race (LP: #1357252). + - job_process_error_handler(): Added handling for new + JOB_PROCESS_ERROR_CGROUP_CLEAR error. + * init/job_process.h: JobProcessErrorType: Added new + JOB_PROCESS_ERROR_CGROUP_CLEAR error. + * init/tests/test_job.c: test_job_last_process(): New test for + job_last_process(). + 2014-08-14 James Hunt <[email protected]> * init/control.c: Disallow modifying system jobs via SetEnv, === modified file 'init/cgroup.c' --- init/cgroup.c 2014-07-01 13:17:52 +0000 +++ init/cgroup.c 2014-08-29 09:06:28 +0000 @@ -302,6 +302,57 @@ } /** + * cgroup_clear: + * + * @cgroups: list of CGroup objects, + * + * Remove all cgroups associated with this job. + * + * Should be called by the _last_ job process to avoid racing with the + * cgroup manager which may remove an empty cgroup before the latest job + * process (which needs the cgroup) has been spawned. + * + * Returns: TRUE on success, FALSE on raised error. + **/ +int +cgroup_clear (NihList *cgroups) +{ + nih_assert (cgroups); + nih_assert (cgroup_manager); + + if (! cgroup_support_enabled ()) + return TRUE; + + if (NIH_LIST_EMPTY (cgroups)) + return TRUE; + + NIH_LIST_FOREACH (cgroups, iter) { + CGroup *cgroup = (CGroup *)iter; + + NIH_LIST_FOREACH (&cgroup->names, iter2) { + CGroupName *cgname = (CGroupName *)iter2; + char *cgpath; + int ret; + + cgpath = cgname->expanded ? cgname->expanded : cgname->name; + + /* Get the cgroup manager to delete the cgroup once no more job + * processes remain in it. + */ + ret = cgmanager_remove_on_empty_sync (NULL, + cgroup_manager, + cgroup->controller, + cgpath); + + if (ret < 0) + return FALSE; + } + } + + return TRUE; +} + +/** * cgroup_setup: * * @cgroups: list of CGroup objects, @@ -316,7 +367,10 @@ * Returns: TRUE on success, FALSE on raised error. **/ int -cgroup_setup (NihList *cgroups, char * const *env, uid_t uid, gid_t gid) +cgroup_setup (NihList *cgroups, + char * const *env, + uid_t uid, + gid_t gid) { const char *upstart_job = NULL; const char *upstart_instance = NULL; @@ -1004,8 +1058,6 @@ /* Drop initial reference now the proxy holds one */ dbus_connection_unref (connection); - nih_debug ("Connected to cgroup manager"); - return 0; } @@ -1021,8 +1073,6 @@ nih_assert (connection); nih_assert (cgroup_manager_address); - nih_warn (_("Disconnected from cgroup manager")); - cgroup_manager = NULL; nih_free (cgroup_manager_address); cgroup_manager_address = NULL; @@ -1075,15 +1125,11 @@ UPSTART_CGROUP_ROOT, pid); - if (ret < 0) return FALSE; - - nih_debug ("Moved pid %d to root of '%s' controller cgroup", - pid, controller); } - /* Ask cgmanager to create the cgroup */ + /* Ask the cgroup manager to create the cgroup */ ret = cgmanager_create_sync (NULL, cgroup_manager, controller, @@ -1093,40 +1139,6 @@ if (ret < 0) return FALSE; - nih_debug ("%s '%s' controller cgroup '%s'", - ! existed ? "Created" : "Using existing", - controller, path); - - /* Get the cgroup manager to delete the cgroup once no more job - * processes remain in it. Never mind if auto-deletion occurs between - * a jobs processes since the group will be recreated anyway by - * cgroup_create(). - * - * This may seem incorrect since if we create the group, - * then mark it to be auto-removed when empty, surely - * it will be immediately deleted? However, the way this works - * is that the group will be deleted once it has _become_ empty - * (having at some time *not* been empty). - * - * The logic of using auto-delete is slightly inefficient - * in terms of cgmanager usage, but is hugely beneficial to - * Upstart since it avoids having to store details of which - * groups were created by jobs and also avoids the complexity of - * the child (which is responsible for creating the cgroups) - * pass back these details asynchronously to the parent to avoid - * it blocking. - */ - ret = cgmanager_remove_on_empty_sync (NULL, - cgroup_manager, - controller, - path); - - if (ret < 0) - return FALSE; - - nih_debug ("Set remove on empty for '%s' controller cgroup '%s'", - controller, path); - return TRUE; } @@ -1162,9 +1174,6 @@ if (ret < 0) return FALSE; - nih_debug ("Moved pid %d to '%s' controller cgroup '%s'", - pid, controller, path); - return TRUE; } @@ -1371,9 +1380,6 @@ return FALSE; } - nih_debug ("Applied cgroup settings to '%s' controller cgroup '%s'", - controller, path); - return TRUE; } @@ -1455,8 +1461,5 @@ if (ret < 0) return FALSE; - nih_debug ("Changed ownership of '%s' controller cgroup '%s'", - controller, path); - return TRUE; } === modified file 'init/cgroup.h' --- init/cgroup.h 2014-07-01 13:11:34 +0000 +++ init/cgroup.h 2014-08-29 09:06:28 +0000 @@ -172,6 +172,8 @@ int cgroup_manager_available (void) __attribute__ ((warn_unused_result)); +int cgroup_clear (NihList *cgroups); + int cgroup_setup (NihList *cgroups, char * const *env, uid_t uid, gid_t gid) __attribute__ ((warn_unused_result)); === modified file 'init/job.c' --- init/job.c 2014-07-10 16:05:04 +0000 +++ init/job.c 2014-08-29 09:06:28 +0000 @@ -2683,3 +2683,33 @@ nih_assert_not_reached (); } } + +#ifdef ENABLE_CGROUPS +/** + * job_last_process: + * + * @job: job, + * @process: process. + * + * Returns: TRUE if the last defined process for @job is @process, + * else FALSE. + **/ +int +job_last_process (const Job *job, ProcessType process) +{ + ProcessType i; + ProcessType last = PROCESS_INVALID; + + nih_assert (job); + nih_assert (process >= PROCESS_MAIN); + nih_assert (process < PROCESS_LAST); + + for (i = 0; i < PROCESS_LAST; i++) { + if (job->class->process[i]) + last = i; + } + + return last == process ? TRUE : FALSE; +} +#endif /* ENABLE_CGROUPS */ + === modified file 'init/job.h' --- init/job.h 2014-06-02 20:29:33 +0000 +++ init/job.h 2014-08-29 09:06:28 +0000 @@ -300,6 +300,13 @@ int job_needs_cgroups (const Job *job) __attribute__ ((warn_unused_result)); +#ifdef ENABLE_CGROUPS + +int job_last_process (const Job *job, ProcessType process) + __attribute__ ((warn_unused_result)); + +#endif /* ENABLE_CGROUPS */ + NIH_END_EXTERN #endif /* INIT_JOB_H */ === modified file 'init/job_process.c' --- init/job_process.c 2014-07-10 16:07:57 +0000 +++ init/job_process.c 2014-08-29 09:06:28 +0000 @@ -890,6 +890,16 @@ job_process_error_abort (fds[1], JOB_PROCESS_ERROR_CGROUP_SETUP, 0); } + /* If spawning the last process for the job, + * arrange for the cgroup manager to destroy + * all job cgroups relating to this job once all + * job processes have completed. + */ + if (job_last_process (job, process)) { + if (! cgroup_clear (&job->class->cgroups)) { + job_process_error_abort (fds[1], JOB_PROCESS_ERROR_CGROUP_CLEAR, 0); + } + } } #endif /* ENABLE_CGROUPS */ @@ -1248,6 +1258,11 @@ err, _("unable to enter cgroup: %s"), strerror (err->errnum))); break; + case JOB_PROCESS_ERROR_CGROUP_CLEAR: + err->error.message = NIH_MUST (nih_sprintf ( + err, _("unable to mark cgroups for removal: %s"), + strerror (err->errnum))); + break; default: nih_assert_not_reached (); @@ -1806,7 +1821,6 @@ nih_assert_not_reached (); } - /* Cancel any timer trying to kill the job, since it's just * died. We could do this inside the main process block above, but * leaving it here for now means we can use the timer for any === modified file 'init/job_process.h' --- init/job_process.h 2014-07-09 16:12:52 +0000 +++ init/job_process.h 2014-08-29 09:06:28 +0000 @@ -98,7 +98,8 @@ JOB_PROCESS_ERROR_SECURITY, JOB_PROCESS_ERROR_CGROUP_MGR_CONNECT, JOB_PROCESS_ERROR_CGROUP_SETUP, - JOB_PROCESS_ERROR_CGROUP_ENTER + JOB_PROCESS_ERROR_CGROUP_ENTER, + JOB_PROCESS_ERROR_CGROUP_CLEAR } JobProcessErrorType; /** === modified file 'init/tests/test_job.c' --- init/tests/test_job.c 2014-05-21 21:12:08 +0000 +++ init/tests/test_job.c 2014-08-29 09:06:28 +0000 @@ -7790,6 +7790,159 @@ NIH_OPTION_LAST }; +void +test_job_last_process (void) +{ + ConfFile *file; + ConfSource *source; + JobClass *class; + Job *job; + int i; + int ret; + + TEST_FUNCTION ("job_last_process"); + + nih_error_init (); + conf_init (); + job_class_init (); + + TEST_HASH_EMPTY (job_classes); + + source = conf_source_new (NULL, "/tmp", CONF_JOB_DIR); + TEST_NE_P (source, NULL); + + file = conf_file_new (source, "/tmp/test"); + TEST_NE_P (file, NULL); + + class = file->job = job_class_new (NULL, "test", NULL); + TEST_NE_P (class, NULL); + + job = job_new (class, ""); + TEST_NE_P (job, NULL); + + TEST_HASH_EMPTY (job_classes); + TEST_TRUE (job_class_consider (class)); + TEST_HASH_NOT_EMPTY (job_classes); + + /*******************************************************************/ + TEST_FEATURE ("no job processes"); + + for (i = 0; i < PROCESS_LAST; i++) { + ret = job_last_process (job, i); + TEST_FALSE (ret); + } + + /*******************************************************************/ + TEST_FEATURE ("first job process"); + + class->process[PROCESS_MAIN] = process_new (class); + TEST_NE_P (class->process[PROCESS_MAIN], NULL); + + for (i = 0; i < PROCESS_LAST; i++) { + ret = job_last_process (job, i); + if (i == PROCESS_MAIN) { + TEST_TRUE (ret); + } else { + TEST_FALSE (ret); + } + } + + nih_free (class->process[PROCESS_MAIN]); + class->process[PROCESS_MAIN] = NULL; + + /*******************************************************************/ + TEST_FEATURE ("last job process"); + + class->process[PROCESS_SECURITY] = process_new (class); + TEST_NE_P (class->process[PROCESS_SECURITY], NULL); + + for (i = 0; i < PROCESS_LAST; i++) { + ret = job_last_process (job, i); + if (i == PROCESS_SECURITY) { + TEST_TRUE (ret); + } else { + TEST_FALSE (ret); + } + } + + nih_free (class->process[PROCESS_SECURITY]); + class->process[PROCESS_SECURITY] = NULL; + + /*******************************************************************/ + TEST_FEATURE ("PROCESS_PRE_STOP job process"); + + class->process[PROCESS_PRE_STOP] = process_new (class); + TEST_NE_P (class->process[PROCESS_PRE_STOP], NULL); + + for (i = 0; i < PROCESS_LAST; i++) { + ret = job_last_process (job, i); + if (i == PROCESS_PRE_STOP) { + TEST_TRUE (ret); + } else { + TEST_FALSE (ret); + } + } + + nih_free (class->process[PROCESS_PRE_STOP]); + class->process[PROCESS_PRE_STOP] = NULL; + + /*******************************************************************/ + TEST_FEATURE ("all job processes set"); + + for (i = 0; i < PROCESS_LAST; i++) { + class->process[i] = process_new (class); + TEST_NE_P (class->process[i], NULL); + } + + for (i = 0; i < PROCESS_LAST; i++) { + ret = job_last_process (job, i); + if (i == PROCESS_SECURITY) { + TEST_TRUE (ret); + } else { + TEST_FALSE (ret); + } + } + + for (i = 0; i < PROCESS_LAST; i++) { + nih_free (class->process[i]); + class->process[i] = NULL; + } + + /*******************************************************************/ + TEST_FEATURE ("all job processes set except PROCESS_SECURITY"); + + for (i = 0; i < PROCESS_SECURITY; i++) { + class->process[i] = process_new (class); + TEST_NE_P (class->process[i], NULL); + } + + for (i = 0; i < PROCESS_LAST; i++) { + ret = job_last_process (job, i); + if (i == PROCESS_POST_STOP) { + TEST_TRUE (ret); + } else { + TEST_FALSE (ret); + } + } + + for (i = 0; i < PROCESS_SECURITY; i++) { + nih_free (class->process[i]); + class->process[i] = NULL; + } + + /***********************************************************/ + /* clean up */ + + nih_free (conf_sources); + nih_free (job_classes); + + conf_sources = NULL; + job_classes = NULL; + + conf_init (); + job_class_init (); +} + int main (int argc, @@ -7841,6 +7994,7 @@ test_deserialise_ptrace (); test_job_find (); + test_job_last_process (); return 0; }
-- upstart-devel mailing list [email protected] Modify settings or unsubscribe at: https://lists.ubuntu.com/mailman/listinfo/upstart-devel
