[lxc-devel] [lxc/lxc] 6a5640: Update get_item test after the lxc.mount.entry fix
Branch: refs/heads/master Home: https://github.com/lxc/lxc Commit: 6a5640665cde64664cc1a4d5e97a334905a0e860 https://github.com/lxc/lxc/commit/6a5640665cde64664cc1a4d5e97a334905a0e860 Author: Stéphane Graber Date: 2015-12-09 (Wed, 09 Dec 2015) Changed paths: M src/tests/get_item.c Log Message: --- Update get_item test after the lxc.mount.entry fix Signed-off-by: Stéphane Graber Acked-by: Serge E. Hallyn ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
Re: [lxc-devel] [PATCH] Update get_item test after the lxc.mount.entry fix
Quoting Stéphane Graber (stgra...@ubuntu.com): > Signed-off-by: Stéphane Graber (*&$)(**$)(*#$*#( Acked-by: Serge E. Hallyn > --- > src/tests/get_item.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/src/tests/get_item.c b/src/tests/get_item.c > index 270ced9..9a9ae59 100644 > --- a/src/tests/get_item.c > +++ b/src/tests/get_item.c > @@ -340,8 +340,8 @@ int main(int argc, char *argv[]) > ret = 1; > goto out; > } > - if (!c->clear_config_item(c, "lxc.mount.entries")) { > - fprintf(stderr, "%d: failed clearing lxc.mount.entries\n", > __LINE__); > + if (!c->clear_config_item(c, "lxc.mount.entry")) { > + fprintf(stderr, "%d: failed clearing lxc.mount.entry\n", > __LINE__); > ret = 1; > goto out; > } > -- > 1.9.1 > > ___ > lxc-devel mailing list > lxc-devel@lists.linuxcontainers.org > http://lists.linuxcontainers.org/listinfo/lxc-devel ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
[lxc-devel] [PATCH] Update get_item test after the lxc.mount.entry fix
Signed-off-by: Stéphane Graber --- src/tests/get_item.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/get_item.c b/src/tests/get_item.c index 270ced9..9a9ae59 100644 --- a/src/tests/get_item.c +++ b/src/tests/get_item.c @@ -340,8 +340,8 @@ int main(int argc, char *argv[]) ret = 1; goto out; } - if (!c->clear_config_item(c, "lxc.mount.entries")) { - fprintf(stderr, "%d: failed clearing lxc.mount.entries\n", __LINE__); + if (!c->clear_config_item(c, "lxc.mount.entry")) { + fprintf(stderr, "%d: failed clearing lxc.mount.entry\n", __LINE__); ret = 1; goto out; } -- 1.9.1 ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
[lxc-devel] [PATCH] Add concise explanations
- explain functions in list.h - let lxc_list_len() return size_t instead of int Signed-off-by: Christian Brauner --- I'm working on some stuff that employs struct lxc_list. I already previously found that list.h lacked some short documentation so here it is. Adding this documentation might be considered tmi but I think that it really eases transitioning into a codebase a lot to have a few lines and common idioms even for quite simple stuff such as circular linked lists here. src/lxc/list.h | 61 -- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/src/lxc/list.h b/src/lxc/list.h index 2d8a2a8..3458b86 100644 --- a/src/lxc/list.h +++ b/src/lxc/list.h @@ -32,42 +32,71 @@ struct lxc_list { #define lxc_init_list(l) { .next = l, .prev = l } +/* + * Iterate through an lxc list. An example for an idiom would be: + * + * struct lxc_list *iterator; + * type *tmp; // where "type" can be an int, char * etc. + * lxc_list_for_each(iterator, list) { + * tmp = iterator->elem; + *// Do stuff with tmp. + * } + * free(iterator); + */ #define lxc_list_for_each(__iterator, __list) \ for (__iterator = (__list)->next; \ __iterator != __list; \ __iterator = __iterator->next) +/* + * Iterate safely through an lxc list. An example for an appropriate use case + * would be: + * + * struct lxc_list *iterator; + * lxc_list_for_each_safe(iterator, list, list->next) { + * tmp = iterator->elem; + *// Do stuff with tmp. + * } + * free(iterator); + */ #define lxc_list_for_each_safe(__iterator, __list, __next) \ for (__iterator = (__list)->next, __next = __iterator->next;\ __iterator != __list; \ __iterator = __next, __next = __next->next) +/* Initalize list. */ static inline void lxc_list_init(struct lxc_list *list) { list->elem = NULL; list->next = list->prev = list; } +/* Add an element to a list. See lxc_list_add() and lxc_list_add_tail() for an + * idiom. */ static inline void lxc_list_add_elem(struct lxc_list *list, void *elem) { list->elem = elem; } +/* Retrieve first element of list. */ static inline void *lxc_list_first_elem(struct lxc_list *list) { return list->next->elem; } +/* Retrieve last element of list. */ static inline void *lxc_list_last_elem(struct lxc_list *list) { return list->prev->elem; } +/* Determine if list is empty. */ static inline int lxc_list_empty(struct lxc_list *list) { return list == list->next; } +/* Workhorse to be called from lxc_list_add() and lxc_list_add_tail(). */ static inline void __lxc_list_add(struct lxc_list *new, struct lxc_list *prev, struct lxc_list *next) @@ -78,17 +107,44 @@ static inline void __lxc_list_add(struct lxc_list *new, prev->next = new; } +/* + * Idiom to add an element to the beginning of an lxc list: + * + * struct lxc_list *tmp = malloc(sizeof(*tmp)); + * if (tmp == NULL) + * return 1; + * lxc_list_add_elem(tmp, elem); + * lxc_list_add(list, tmp); + */ static inline void lxc_list_add(struct lxc_list *head, struct lxc_list *list) { __lxc_list_add(list, head, head->next); } +/* + * Idiom to add an element to the end of an lxc list: + * + * struct lxc_list *tmp = malloc(sizeof(*tmp)); + * if (tmp == NULL) + * return 1; + * lxc_list_add_elem(tmp, elem); + * lxc_list_add_tail(list, tmp); + */ static inline void lxc_list_add_tail(struct lxc_list *head, struct lxc_list *list) { __lxc_list_add(list, head->prev, head); } +/* + * Idiom to free an lxc list: + * + * lxc_list_for_each_safe(iterator, list, list->next) { + * lxc_list_del(iterator); + * free(iterator); + * } + * free(iterator); + */ static inline void lxc_list_del(struct lxc_list *list) { struct lxc_list *next, *prev; @@ -99,9 +155,10 @@ static inline void lxc_list_del(struct lxc_list *list) prev->next = next; } -static inline int lxc_list_len(struct lxc_list *list) +/* Return length of the list. */ +static inline size_t lxc_list_len(struct lxc_list *list) { -int i = 0; +size_t i = 0; struct lxc_list *iter; lxc_list_for_each(iter, list) { i++; -- 2.6.3 ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
[lxc-devel] [lxc/lxc] 988247: Improve the lxc-copy Japanese manpage
Branch: refs/heads/master Home: https://github.com/lxc/lxc Commit: 988247d5add4114b4a8311ff5364f1fbddd8613f https://github.com/lxc/lxc/commit/988247d5add4114b4a8311ff5364f1fbddd8613f Author: Hiroaki Nakamura Date: 2015-12-05 (Sat, 05 Dec 2015) Changed paths: M doc/ja/lxc-copy.sgml.in Log Message: --- Improve the lxc-copy Japanese manpage Signed-off-by: Hiroaki Nakamura Commit: 33ae222cf95b58bc49728b466e31e646b8859ca4 https://github.com/lxc/lxc/commit/33ae222cf95b58bc49728b466e31e646b8859ca4 Author: Stéphane Graber Date: 2015-12-09 (Wed, 09 Dec 2015) Changed paths: M doc/ja/lxc-copy.sgml.in Log Message: --- Merge pull request #710 from hnakamur/improve_lxc_copy_japanese_manpage Improve the lxc-copy Japanese manpage Compare: https://github.com/lxc/lxc/compare/a7c6b8c7b731...33ae222cf95b___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
Re: [lxc-devel] [PATCH] fix 'lxc.mount.entry' key when clearing unexpanded config
On Thu, Dec 10, 2015 at 02:12:41AM +, Serge Hallyn wrote: > Closes #712 > > Signed-off-by: Serge Hallyn Acked-by: Stéphane Graber > --- > src/lxc/confile.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/lxc/confile.c b/src/lxc/confile.c > index ce6786c..1185d7d 100644 > --- a/src/lxc/confile.c > +++ b/src/lxc/confile.c > @@ -2533,7 +2533,7 @@ int lxc_clear_config_item(struct lxc_conf *c, const > char *key) > return lxc_clear_config_keepcaps(c); > else if (strncmp(key, "lxc.cgroup", 10) == 0) > return lxc_clear_cgroups(c, key); > - else if (strcmp(key, "lxc.mount.entries") == 0) > + else if (strcmp(key, "lxc.mount.entry") == 0) > return lxc_clear_mount_entries(c); > else if (strcmp(key, "lxc.mount.auto") == 0) > return lxc_clear_automounts(c); > -- > 2.5.0 > > ___ > lxc-devel mailing list > lxc-devel@lists.linuxcontainers.org > http://lists.linuxcontainers.org/listinfo/lxc-devel -- Stéphane Graber Ubuntu developer http://www.ubuntu.com signature.asc Description: Digital signature ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
[lxc-devel] [lxc/lxc] 8164f0: api wrapper: only reset the current config if this...
Branch: refs/heads/master Home: https://github.com/lxc/lxc Commit: 8164f0e253e9c148a3c3c09eec5e0ebe56602805 https://github.com/lxc/lxc/commit/8164f0e253e9c148a3c3c09eec5e0ebe56602805 Author: Tycho Andersen Date: 2015-12-09 (Wed, 09 Dec 2015) Changed paths: M src/lxc/lxccontainer.c Log Message: --- api wrapper: only reset the current config if this call set it Instead of *always* resetting the current_config to null, we should only reset it if this API call set it. This allows nesting of API calls, e.g. c->checkpoint() can pass stuff into criu.c, which can call c->init_pid() and not lose the ability to log stuff afterwards. Signed-off-by: Tycho Andersen Acked-by: Serge E. Hallyn Commit: 6bf5b3da1eb7db1f488e5d13aee592afdde7021c https://github.com/lxc/lxc/commit/6bf5b3da1eb7db1f488e5d13aee592afdde7021c Author: Tycho Andersen Date: 2015-12-09 (Wed, 09 Dec 2015) Changed paths: M src/lxc/criu.h Log Message: --- c/r: bump criu version requirements Since we're relying on 1.8 for the seccomp stuff, let's refuse to use anything lower than that. Signed-off-by: Tycho Andersen Acked-by: Serge E. Hallyn Commit: aef3d51e61d8e65f31201e4a60eb4102ce5b4385 https://github.com/lxc/lxc/commit/aef3d51e61d8e65f31201e4a60eb4102ce5b4385 Author: Tycho Andersen Date: 2015-12-09 (Wed, 09 Dec 2015) Changed paths: M src/lxc/criu.c M src/lxc/criu.h M src/lxc/lxccontainer.c M src/lxc/lxccontainer.h Log Message: --- c/r: add a new ->migrate API call This patch adds a new ->migrate API call with three commands: MIGRATE_DUMP: this is basically just ->checkpoint() MIGRATE_RESTORE: this is just ->restore() MIGRATE_PRE_DUMP: this can be used to invoke criu's pre-dump command on the container. A small addition to the (pre-)dump commands is the ability to specify a previous partial dump directory, so that one can use a pre-dump of a container. Finally, this new API call uses a structure to pass options so that it can be easily extended in the future (e.g. to CRIU's --leave-frozen option in the future, for potentially smarter failure handling on restore). v2: remember to flip the return code for legacy ->checkpoint and ->restore calls Signed-off-by: Tycho Andersen Acked-by: Serge E. Hallyn Commit: fa07124900989e1a2e617659093da293b583470f https://github.com/lxc/lxc/commit/fa07124900989e1a2e617659093da293b583470f Author: Tycho Andersen Date: 2015-12-09 (Wed, 09 Dec 2015) Changed paths: M src/lxc/criu.c Log Message: --- c/r: remove random line continuations No idea how these got there, but let's get rid of them since they're weird. Signed-off-by: Tycho Andersen Acked-by: Serge E. Hallyn Commit: cdcae1034cb0e0ead1a3777ad5ce679e2a5883ef https://github.com/lxc/lxc/commit/cdcae1034cb0e0ead1a3777ad5ce679e2a5883ef Author: TAMUKI Shoichi Date: 2015-12-09 (Wed, 09 Dec 2015) Changed paths: M config/init/common/lxc-net.in Log Message: --- Fold dnsmasq command line in lxc-net.in Fold dnsmasq command line at about 80 chars because the line is too long. Signed-off-by: TAMUKI Shoichi Acked-by: Serge E. Hallyn Commit: af2d9fad6e333deabaf8eefc7c6caf9c96a0a280 https://github.com/lxc/lxc/commit/af2d9fad6e333deabaf8eefc7c6caf9c96a0a280 Author: TAMUKI Shoichi Date: 2015-12-09 (Wed, 09 Dec 2015) Changed paths: M configure.ac Log Message: --- Add support for new target plamo to configure.ac Add support for new target plamo to specify the linux distribution. Plamo Linux uses sysvinit. Signed-off-by: KATOH Yasufumi Signed-off-by: TAMUKI Shoichi Acked-by: Serge E. Hallyn Commit: ba3004da531583d6ec7f2d8a8c17008b4684c721 https://github.com/lxc/lxc/commit/ba3004da531583d6ec7f2d8a8c17008b4684c721 Author: Sungbae Yoo Date: 2015-12-09 (Wed, 09 Dec 2015) Changed paths: M doc/ko/lxc.container.conf.sgml.in Log Message: --- Add LXC_TARGET env to Korean lxc.container.conf(5) Update for commit c154af9 Signed-off-by: Sungbae Yoo Acked-by: Stéphane Graber Commit: 06078509e389d093b85465e9eef9215928ee51a0 https://github.com/lxc/lxc/commit/06078509e389d093b85465e9eef9215928ee51a0 Author: Tycho Andersen Date: 2015-12-09 (Wed, 09 Dec 2015) Changed paths: M src/lxc/cgfs.c M src/lxc/cgmanager.c M src/lxc/cgroup.c M src/lxc/cgroup.h Log Message: --- cgroup: add cgroup_escape() call We'll use this in the next patch to escape to the root cgroup before we exec criu. v2: s/cgm_connected/cmg_needs_disconnect/g Signed-off-by: Tycho Andersen Acked-by: Serge E. Hallyn Commit: e9195050b4abd0bc4c207457cb2f1f161b583fc8 https://github.com/lxc/lxc/commit/e9195050b4abd0bc4c207457cb2f1f161b583fc8 Author: Tycho Andersen Date: 2015-12-09 (Wed, 09 Dec 2015) Changed paths: M src/lxc/criu.c Log Message:
Re: [lxc-devel] [PATCH] Add LXC_TARGET env to Korean lxc.container.conf(5)
On Fri, Dec 04, 2015 at 09:13:45AM +, Sungbae Yoo wrote: > > Update for commit c154af9 > > Signed-off-by: Sungbae Yoo Acked-by: Stéphane Graber > > diff --git a/doc/ko/lxc.container.conf.sgml.in > b/doc/ko/lxc.container.conf.sgml.in > index 741003b..73b16a2 100644 > --- a/doc/ko/lxc.container.conf.sgml.in > +++ b/doc/ko/lxc.container.conf.sgml.in > @@ -2138,7 +2138,22 @@ mknod errno 0 > > > > - > + > + > + > +LXC_TARGET > + > + > + > + > + stop 훅에서만 사용된다. 값이 "stop"이면 컨테이너가 종료되는 것을, "reboot"이면 컨테이너가 > 재부팅되는 것을 의미한다. > + > + > + > + > > > > -- > 1.9.1 > ___ > lxc-devel mailing list > lxc-devel@lists.linuxcontainers.org > http://lists.linuxcontainers.org/listinfo/lxc-devel -- Stéphane Graber Ubuntu developer http://www.ubuntu.com signature.asc Description: Digital signature ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
[lxc-devel] [PATCH] fix 'lxc.mount.entry' key when clearing unexpanded config
Closes #712 Signed-off-by: Serge Hallyn --- src/lxc/confile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lxc/confile.c b/src/lxc/confile.c index ce6786c..1185d7d 100644 --- a/src/lxc/confile.c +++ b/src/lxc/confile.c @@ -2533,7 +2533,7 @@ int lxc_clear_config_item(struct lxc_conf *c, const char *key) return lxc_clear_config_keepcaps(c); else if (strncmp(key, "lxc.cgroup", 10) == 0) return lxc_clear_cgroups(c, key); - else if (strcmp(key, "lxc.mount.entries") == 0) + else if (strcmp(key, "lxc.mount.entry") == 0) return lxc_clear_mount_entries(c); else if (strcmp(key, "lxc.mount.auto") == 0) return lxc_clear_automounts(c); -- 2.5.0 ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
Re: [lxc-devel] [PATCH 1/8] kernfs: Add API to generate relative kernfs path
On Wed, Dec 09, 2015 at 05:36:51PM -0500, Tejun Heo wrote: > Hey, > > On Wed, Dec 09, 2015 at 10:13:27PM +, Serge Hallyn wrote: > > we can rename kn_root to from here if you think that's clearer (and > > change the order here as well). > > I think it'd be better for them to be consistent and in the same order > - the target and then the optional base. > > > > Was converting the path functions to return > > > length too much work? If so, that's fine but please explain what > > > decisions were made. > > > > Yes, I had replied saying: > > > > |I can change that, but the callers right now don't re-try with > > |larger buffer anyway, so this would actually complicate them just > > |a smidgeon. Would you want them changed to do that? (pr_cont_kernfs_path > > |right now writes into a static char[] for instance) > > > > I can still make that change if you like. > > Oops, sorry I forgot about that. The reason why kernfs_path() is > written the current way was me being lazy. While I think it'd be > better to make the functions behave like normal string handling > functions if we're extending it, I don't think it's that important. > If it's easy, please go ahead. If not, we can get back to it later > when necessary. > > > > I skimmed through the series and spotted several other review points > > > which didn't get addressed. Can you please go over the previous > > > review cycle and address the review points? > > > > I did go through every email twice, once while making changes (one > > branch per response) and once while making changelog for each patch, > > sorry about whatever I missed. I'll go through each again. > > The other chunk I noticed was inline conversions of internal functions > which didn't seem to belong to the patch. I asked whether those were > stray chunks. Maybe the comment was too buried to notice? Anyways, > that part actually causes conflicts when applying to cgroup/for-4.5. > > There are a couple more things. > > * Can you please put the ns related decls after the regular cgroup > stuff in cgroup.h? > > * I think I might need to edit the documentation anyway but it'd be > great if you can make the namespace section more in line with the > rest of the documentation - e.g. s/CGroup/cgroup/ and more > structured sectioning. Ok fwiw I've fixed up the arguments to kernfs_path_from_node, removed the inlines, and moved the ns related decls after the others in cgroup.h (i.e. done the easy stuff) in the 2015-12-09/cgroupns.3 branch of git://git.kernel.org/pub/scm/linux/kernel/git/sergeh/linux-security.git I'll address the rest either after next week or, hopefully, when I get a chance earlier. > At this point, it all generally looks good to me. Let's get the > nits out of the way and merge it. If you wanted to take the branch as is, then I'll do the documentation and pr_cont_kernfs_path() etc rewrite as separate patches, but I'll assume you'd like to at least wait for doc rewrite. -serge ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
Re: [lxc-devel] [PATCH 1/8] kernfs: Add API to generate relative kernfs path
On Wed, Dec 09, 2015 at 05:36:51PM -0500, Tejun Heo wrote: > Hey, > > On Wed, Dec 09, 2015 at 10:13:27PM +, Serge Hallyn wrote: > > we can rename kn_root to from here if you think that's clearer (and > > change the order here as well). > > I think it'd be better for them to be consistent and in the same order > - the target and then the optional base. > > > > Was converting the path functions to return > > > length too much work? If so, that's fine but please explain what > > > decisions were made. > > > > Yes, I had replied saying: > > > > |I can change that, but the callers right now don't re-try with > > |larger buffer anyway, so this would actually complicate them just > > |a smidgeon. Would you want them changed to do that? (pr_cont_kernfs_path > > |right now writes into a static char[] for instance) > > > > I can still make that change if you like. > > Oops, sorry I forgot about that. The reason why kernfs_path() is > written the current way was me being lazy. While I think it'd be > better to make the functions behave like normal string handling > functions if we're extending it, I don't think it's that important. > If it's easy, please go ahead. If not, we can get back to it later > when necessary. Ok - I'm now gone until Dec 21 (and laptopping won't be an option :( ). I'll make the other changes then and do this as well. So pr_cont_kernfs_path() will dynamically allocate a longer buffer (only) if needed. > > > I skimmed through the series and spotted several other review points > > > which didn't get addressed. Can you please go over the previous > > > review cycle and address the review points? > > > > I did go through every email twice, once while making changes (one > > branch per response) and once while making changelog for each patch, > > sorry about whatever I missed. I'll go through each again. > > The other chunk I noticed was inline conversions of internal functions > which didn't seem to belong to the patch. I asked whether those were > stray chunks. Maybe the comment was too buried to notice? Anyways, > that part actually causes conflicts when applying to cgroup/for-4.5. Gah. I saw one and removed it. Grep tells me I missed some, will remove them all next time. > There are a couple more things. > > * Can you please put the ns related decls after the regular cgroup > stuff in cgroup.h? ok > * I think I might need to edit the documentation anyway but it'd be > great if you can make the namespace section more in line with the > rest of the documentation - e.g. s/CGroup/cgroup/ and more > structured sectioning. I'll read through it and look for patterns to change. > At this point, it all generally looks good to me. Let's get the > nits out of the way and merge it. > > Thanks. thanks, -serge ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
Re: [lxc-devel] [PATCH 1/8] kernfs: Add API to generate relative kernfs path
Hey, On Wed, Dec 09, 2015 at 10:13:27PM +, Serge Hallyn wrote: > we can rename kn_root to from here if you think that's clearer (and > change the order here as well). I think it'd be better for them to be consistent and in the same order - the target and then the optional base. > > Was converting the path functions to return > > length too much work? If so, that's fine but please explain what > > decisions were made. > > Yes, I had replied saying: > > |I can change that, but the callers right now don't re-try with > |larger buffer anyway, so this would actually complicate them just > |a smidgeon. Would you want them changed to do that? (pr_cont_kernfs_path > |right now writes into a static char[] for instance) > > I can still make that change if you like. Oops, sorry I forgot about that. The reason why kernfs_path() is written the current way was me being lazy. While I think it'd be better to make the functions behave like normal string handling functions if we're extending it, I don't think it's that important. If it's easy, please go ahead. If not, we can get back to it later when necessary. > > I skimmed through the series and spotted several other review points > > which didn't get addressed. Can you please go over the previous > > review cycle and address the review points? > > I did go through every email twice, once while making changes (one > branch per response) and once while making changelog for each patch, > sorry about whatever I missed. I'll go through each again. The other chunk I noticed was inline conversions of internal functions which didn't seem to belong to the patch. I asked whether those were stray chunks. Maybe the comment was too buried to notice? Anyways, that part actually causes conflicts when applying to cgroup/for-4.5. There are a couple more things. * Can you please put the ns related decls after the regular cgroup stuff in cgroup.h? * I think I might need to edit the documentation anyway but it'd be great if you can make the namespace section more in line with the rest of the documentation - e.g. s/CGroup/cgroup/ and more structured sectioning. At this point, it all generally looks good to me. Let's get the nits out of the way and merge it. Thanks. -- tejun ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
Re: [lxc-devel] [PATCH 1/8] kernfs: Add API to generate relative kernfs path
Quoting Tejun Heo (t...@kernel.org): > Hello, Serge. > > On Wed, Dec 09, 2015 at 01:28:54PM -0600, serge.hal...@ubuntu.com wrote: > > +/* kernfs_node_depth - compute depth from @from to @to */ > > +static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node > > *to) > ... > > +char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) > > +{ > > + return kernfs_path_from_node(NULL, kn, buf, buflen); > > +} > ... > > diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h > > index 5d4e9c4..d025ebd 100644 > > --- a/include/linux/kernfs.h > > +++ b/include/linux/kernfs.h > > @@ -267,6 +267,9 @@ static inline bool kernfs_ns_enabled(struct kernfs_node > > *kn) > > > > int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); > > size_t kernfs_path_len(struct kernfs_node *kn); > > +char * __must_check kernfs_path_from_node(struct kernfs_node *root_kn, > > + struct kernfs_node *kn, char *buf, > > + size_t buflen); > > I think I commented on the same thing before, but I think it'd make > more sense to put @from after @to Oh. You said that for kernfs_path_from_node_locked(), and those were changed. kernfs_path_form_node() is a different fn, but > and the prototype is using @root_kn > which is a bit confusing. we can rename kn_root to from here if you think that's clearer (and change the order here as well). > Was converting the path functions to return > length too much work? If so, that's fine but please explain what > decisions were made. Yes, I had replied saying: |I can change that, but the callers right now don't re-try with |larger buffer anyway, so this would actually complicate them just |a smidgeon. Would you want them changed to do that? (pr_cont_kernfs_path |right now writes into a static char[] for instance) I can still make that change if you like. > I skimmed through the series and spotted several other review points > which didn't get addressed. Can you please go over the previous > review cycle and address the review points? I did go through every email twice, once while making changes (one branch per response) and once while making changelog for each patch, sorry about whatever I missed. I'll go through each again. I'm going to be out for awhile after today, so next version will unfortunately take awhile. thanks, -serge ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
Re: [lxc-devel] [PATCH 1/8] kernfs: Add API to generate relative kernfs path
Hello, Serge. On Wed, Dec 09, 2015 at 01:28:54PM -0600, serge.hal...@ubuntu.com wrote: > +/* kernfs_node_depth - compute depth from @from to @to */ > +static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to) ... > +char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) > +{ > + return kernfs_path_from_node(NULL, kn, buf, buflen); > +} ... > diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h > index 5d4e9c4..d025ebd 100644 > --- a/include/linux/kernfs.h > +++ b/include/linux/kernfs.h > @@ -267,6 +267,9 @@ static inline bool kernfs_ns_enabled(struct kernfs_node > *kn) > > int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); > size_t kernfs_path_len(struct kernfs_node *kn); > +char * __must_check kernfs_path_from_node(struct kernfs_node *root_kn, > + struct kernfs_node *kn, char *buf, > + size_t buflen); I think I commented on the same thing before, but I think it'd make more sense to put @from after @to and the prototype is using @root_kn which is a bit confusing. Was converting the path functions to return length too much work? If so, that's fine but please explain what decisions were made. I skimmed through the series and spotted several other review points which didn't get addressed. Can you please go over the previous review cycle and address the review points? Thanks. -- tejun ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
Re: [lxc-devel] [PATCH 1/3] cgroup: add cgroup_escape() call
On Wed, Dec 09, 2015 at 02:58:20AM +, Serge Hallyn wrote: > Quoting Tycho Andersen (tycho.ander...@canonical.com): > > + bool ret = true, cgm_connected = false; > > Sorry, can you rename this disconnect_cgm or cgm_needs_disconnect ? Sure, see attached. Tycho >From c378426d560a8a071656f08e7f27bf5fbe9b17ad Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Mon, 7 Dec 2015 17:07:05 -0700 Subject: [PATCH 1/4] cgroup: add cgroup_escape() call We'll use this in the next patch to escape to the root cgroup before we exec criu. v2: s/cgm_connected/cmg_needs_disconnect/g Signed-off-by: Tycho Andersen Acked-by: Serge E. Hallyn --- src/lxc/cgfs.c | 50 ++ src/lxc/cgmanager.c | 19 --- src/lxc/cgroup.c| 7 +++ src/lxc/cgroup.h| 2 ++ 4 files changed, 75 insertions(+), 3 deletions(-) diff --git a/src/lxc/cgfs.c b/src/lxc/cgfs.c index d65f2d7..94b3d87 100644 --- a/src/lxc/cgfs.c +++ b/src/lxc/cgfs.c @@ -2343,6 +2343,55 @@ static const char *cgfs_canonical_path(void *hdata) return path; } +static bool cgfs_escape(void) +{ + struct cgroup_meta_data *md; + int i; + bool ret = false; + + md = lxc_cgroup_load_meta(); + if (!md) + return false; + + for (i = 1; i <= md->maximum_hierarchy; i++) { + struct cgroup_hierarchy *h = md->hierarchies[i]; + struct cgroup_mount_point *mp; + char *tasks; + FILE *f; + int written; + + if (!h) { + WARN("not escaping hierarchy %d", i); + continue; + } + + mp = lxc_cgroup_find_mount_point(h, "/", true); + if (!mp) + goto out; + + tasks = cgroup_to_absolute_path(mp, "/", "tasks"); + if (!tasks) + goto out; + + f = fopen(tasks, "a"); + free(tasks); + if (!f) + goto out; + + written = fprintf(f, "%d\n", getpid()); + fclose(f); + if (written < 0) { + SYSERROR("writing tasks failed\n"); + goto out; + } + } + + ret = true; +out: + lxc_cgroup_put_meta(md); + return ret; +} + static bool cgfs_unfreeze(void *hdata) { struct cgfs_data *d = hdata; @@ -2408,6 +2457,7 @@ static struct cgroup_ops cgfs_ops = { .create_legacy = cgfs_create_legacy, .get_cgroup = cgfs_get_cgroup, .canonical_path = cgfs_canonical_path, + .escape = cgfs_escape, .get = lxc_cgroupfs_get, .set = lxc_cgroupfs_set, .unfreeze = cgfs_unfreeze, diff --git a/src/lxc/cgmanager.c b/src/lxc/cgmanager.c index 05df0da..b82be59 100644 --- a/src/lxc/cgmanager.c +++ b/src/lxc/cgmanager.c @@ -312,13 +312,22 @@ static bool lxc_cgmanager_create(const char *controller, const char *cgroup_path * be in "/lxc/c1" rather than "/user//c1" * called internally with connection already open */ -static bool lxc_cgmanager_escape(void) +static bool cgm_escape(void) { - bool ret = true; + bool ret = true, cgm_needs_disconnect = false; pid_t me = getpid(); char **slist = subsystems; int i; + if (!cgroup_manager) { + if (!cgm_dbus_connect()) { + ERROR("Error connecting to cgroup manager"); + return false; + } + cgm_needs_disconnect = true; + } + + if (cgm_all_controllers_same) slist = subsystems_inone; @@ -335,6 +344,9 @@ static bool lxc_cgmanager_escape(void) } } + if (cgm_needs_disconnect) + cgm_dbus_disconnect(); + return ret; } @@ -1307,7 +1319,7 @@ struct cgroup_ops *cgm_ops_init(void) goto err1; // root; try to escape to root cgroup - if (geteuid() == 0 && !lxc_cgmanager_escape()) + if (geteuid() == 0 && !cgm_escape()) goto err2; cgm_dbus_disconnect(); @@ -1524,6 +1536,7 @@ static struct cgroup_ops cgmanager_ops = { .create_legacy = NULL, .get_cgroup = cgm_get_cgroup, .canonical_path = cgm_canonical_path, + .escape = cgm_escape, .get = cgm_get, .set = cgm_set, .unfreeze = cgm_unfreeze, diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c index b1c764f..aeb8a58 100644 --- a/src/lxc/cgroup.c +++ b/src/lxc/cgroup.c @@ -109,6 +109,13 @@ const char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem return NULL; } +bool cgroup_escape(void) +{ + if (ops) + return ops->escape(); + return false; +} + const char *cgroup_canonical_path(struct lxc_handler *handler) { if (geteuid()) { diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h index 7704c04..e3d3ce4 100644 --- a/src/lxc/cgroup.h +++ b/src/lxc/cgroup.h @@ -47,6 +47,7 @@ struct cgroup_ops { bool (*create_legacy)(void *hdata, pid_t pid); const char *(*get_cgroup)(void *hdata, const char *subsystem); const char *(*canonical_path)(void *hdata); + bool (*escape)(void); int (*set)(const char *filename, const char *value, const char *name, const char *lxcpath); int (*get)(const char *filename, char *value, size_t len, const char *name, const char *lxcpath); bool (*unfreeze)(void *hdata); @@ -71,6 +72,7 @@ extern void cgroup_cleanup(struct lxc_handler *handler); extern bool cgroup_create_legacy(struct lxc_handler *handler); extern int cgroup_nrtasks(struct lxc_handler *handler); extern const char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsys
[lxc-devel] [PATCH 6/8] cgroup: mount cgroupns-root when inside non-init cgroupns
From: Serge Hallyn This patch enables cgroup mounting inside userns when a process as appropriate privileges. The cgroup filesystem mounted is rooted at the cgroupns-root. Thus, in a container-setup, only the hierarchy under the cgroupns-root is exposed inside the container. This allows container management tools to run inside the containers without depending on any global state. Signed-off-by: Serge Hallyn --- Changelog: 20151116 - Don't allow user namespaces to bind new subsystems 20151118 - postpone the FS_USERNS_MOUNT flag until the last patch, until we can convince ourselves it is safe. 20151207 - Switch to walking up the kernfs path from kn root. - Group initialized variables - Explain the capable(CAP_SYS_ADMIN) check - Style fixes --- kernel/cgroup.c | 40 +++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f34551a..b92b3fd 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2006,6 +2006,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, { bool is_v2 = fs_type == &cgroup2_fs_type; struct super_block *pinned_sb = NULL; + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_subsys *ss; struct cgroup_root *root; struct cgroup_sb_opts opts; @@ -2014,6 +2015,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, int i; bool new_sb; + get_cgroup_ns(ns); + + /* Check if the caller has permission to mount. */ + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) { + put_cgroup_ns(ns); + return ERR_PTR(-EPERM); + } + /* * The first time anyone tries to mount a cgroup, enable the list * linking each css_set to its tasks and fix up all existing tasks. @@ -2129,6 +2138,16 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, goto out_unlock; } + /* +* We know this subsystem has not yet been bound. Users in a non-init +* user namespace may only mount hierarchies with no bound subsystems, +* i.e. 'none,name=user1' +*/ + if (!opts.none && !capable(CAP_SYS_ADMIN)) { + ret = -EPERM; + goto out_unlock; + } + root = kzalloc(sizeof(*root), GFP_KERNEL); if (!root) { ret = -ENOMEM; @@ -2147,12 +2166,30 @@ out_free: kfree(opts.release_agent); kfree(opts.name); - if (ret) + if (ret) { + put_cgroup_ns(ns); return ERR_PTR(ret); + } out_mount: dentry = kernfs_mount(fs_type, flags, root->kf_root, is_v2 ? CGROUP2_SUPER_MAGIC : CGROUP_SUPER_MAGIC, &new_sb); + + /* +* In non-init cgroup namespace, instead of root cgroup's +* dentry, we return the dentry corresponding to the +* cgroupns->root_cgrp. +*/ + if (!IS_ERR(dentry) && ns != &init_cgroup_ns) { + struct dentry *nsdentry; + struct cgroup *cgrp; + + cgrp = cset_cgroup_from_root(ns->root_cset, root); + nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb); + dput(dentry); + dentry = nsdentry; + } + if (IS_ERR(dentry) || !new_sb) cgroup_put(&root->cgrp); @@ -2165,6 +2202,7 @@ out_mount: deactivate_super(pinned_sb); } + put_cgroup_ns(ns); return dentry; } -- 1.7.9.5 ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
[lxc-devel] [PATCH 7/8] cgroup: Add documentation for cgroup namespaces
From: Aditya Kali Signed-off-by: Aditya Kali Signed-off-by: Serge Hallyn --- Changelog (2015-12-08): Merge into Documentation/cgroup.txt --- Documentation/cgroup.txt | 144 ++ 1 file changed, 144 insertions(+) diff --git a/Documentation/cgroup.txt b/Documentation/cgroup.txt index 31d1f7b..ca42df4 100644 --- a/Documentation/cgroup.txt +++ b/Documentation/cgroup.txt @@ -47,6 +47,7 @@ CONTENTS 5-3. IO 5-3-1. IO Interface Files 5-3-2. Writeback +6. Namespaces P. Information on Kernel Programming P-1. Filesystem Support for Writeback D. Deprecated v1 Core Features @@ -1013,6 +1014,149 @@ writeback as follows. vm.dirty[_background]_ratio. +6. CGroup Namespaces + +CGroup Namespace provides a mechanism to virtualize the view of the +/proc//cgroup file. The CLONE_NEWCGROUP clone-flag can be used with +clone() and unshare() syscalls to create a new cgroup namespace. +The process running inside the cgroup namespace will have its /proc//cgroup +output restricted to cgroupns-root. cgroupns-root is the cgroup of the process +at the time of creation of the cgroup namespace. + +Prior to CGroup Namespace, the /proc//cgroup file used to show complete +path of the cgroup of a process. In a container setup (where a set of cgroups +and namespaces are intended to isolate processes), the /proc//cgroup file +may leak potential system level information to the isolated processes. + +For Example: + $ cat /proc/self/cgroup + 0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/batchjobs/container_id1 + +The path '/batchjobs/container_id1' can generally be considered as system-data +and its desirable to not expose it to the isolated process. + +CGroup Namespaces can be used to restrict visibility of this path. +For Example: + # Before creating cgroup namespace + $ ls -l /proc/self/ns/cgroup + lrwxrwxrwx 1 root root 0 2014-07-15 10:37 /proc/self/ns/cgroup -> cgroup:[4026531835] + $ cat /proc/self/cgroup + 0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/batchjobs/container_id1 + + # unshare(CLONE_NEWCGROUP) and exec /bin/bash + $ ~/unshare -c + [ns]$ ls -l /proc/self/ns/cgroup + lrwxrwxrwx 1 root root 0 2014-07-15 10:35 /proc/self/ns/cgroup -> cgroup:[4026532183] + # From within new cgroupns, process sees that its in the root cgroup + [ns]$ cat /proc/self/cgroup + 0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/ + + # From global cgroupns: + $ cat /proc//cgroup + 0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/batchjobs/container_id1 + + # Unshare cgroupns along with userns and mountns + # Following calls unshare(CLONE_NEWCGROUP|CLONE_NEWUSER|CLONE_NEWNS), then + # sets up uid/gid map and execs /bin/bash + $ ~/unshare -c -u -m + # Originally, we were in /batchjobs/container_id1 cgroup. Mount our own cgroup + # hierarchy. + [ns]$ mount -t cgroup cgroup /tmp/cgroup + [ns]$ ls -l /tmp/cgroup + total 0 + -r--r--r-- 1 root root 0 2014-10-13 09:32 cgroup.controllers + -r--r--r-- 1 root root 0 2014-10-13 09:32 cgroup.populated + -rw-r--r-- 1 root root 0 2014-10-13 09:25 cgroup.procs + -rw-r--r-- 1 root root 0 2014-10-13 09:32 cgroup.subtree_control + +The cgroupns-root (/batchjobs/container_id1 in above example) becomes the +filesystem root for the namespace specific cgroupfs mount. + +The virtualization of /proc/self/cgroup file combined with restricting +the view of cgroup hierarchy by namespace-private cgroupfs mount +should provide a completely isolated cgroup view inside the container. + +In its current form, the cgroup namespaces patcheset provides following +behavior: + +(1) The 'cgroupns-root' for a cgroup namespace is the cgroup in which +the process calling unshare is running. +For ex. if a process in /batchjobs/container_id1 cgroup calls unshare, +cgroup /batchjobs/container_id1 becomes the cgroupns-root. +For the init_cgroup_ns, this is the real root ('/') cgroup +(identified in code as cgrp_dfl_root.cgrp). + +(2) The cgroupns-root cgroup does not change even if the namespace +creator process later moves to a different cgroup. +$ ~/unshare -c # unshare cgroupns in some cgroup +[ns]$ cat /proc/self/cgroup +0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/ +[ns]$ mkdir sub_cgrp_1 +[ns]$ echo 0 > sub_cgrp_1/cgroup.procs +[ns]$ cat /proc/self/cgroup +0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/sub_cgrp_1 + +(3) Each process gets its CGROUPNS specific view of /proc//cgroup +(a) Processes running inside the cgroup namespace will be able to see +cgroup paths (in /proc/self/cgroup) only inside their root cgroup +[ns]$ sleep 10 & # From within unshared cgroupns +[1] 7353 +[ns]$ echo 7353 > sub_cgrp_1/cgroup.procs +[ns]$ cat /proc/7353/cgroup +0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/sub_cgrp_1 + +(b) From global cgroupns, the real cgroup path will be visible: +$ cat /proc/7353/cgroup + 0:cpuset,cpu,cp
[lxc-devel] [PATCH 8/8] Add FS_USERNS_FLAG to cgroup fs
From: Serge Hallyn allowing root in a non-init user namespace to mount it. This should now be safe, because 1. non-init-root cannot mount a previously unbound subsystem 2. the task doing the mount must be privileged with respect to the user namespace owning the cgroup namespace 3. the mounted subsystem will have its current cgroup as the root dentry. the permissions will be unchanged, so tasks will receive no new privilege over the cgroups which they did not have on the original mounts. Signed-off-by: Serge Hallyn --- kernel/cgroup.c |2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b92b3fd..7d5d7e1 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2231,12 +2231,14 @@ static struct file_system_type cgroup_fs_type = { .name = "cgroup", .mount = cgroup_mount, .kill_sb = cgroup_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; static struct file_system_type cgroup2_fs_type = { .name = "cgroup2", .mount = cgroup_mount, .kill_sb = cgroup_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; static char * cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, -- 1.7.9.5 ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
[lxc-devel] CGroup Namespaces (v7)
Hi, following is a revised set of the CGroup Namespace patchset which Aditya Kali has previously sent. The code can also be found in the cgroupns.v7 branch of https://git.kernel.org/cgit/linux/kernel/git/sergeh/linux-security.git/ To summarize the semantics: 1. CLONE_NEWCGROUP re-uses 0x0200, which was previously CLONE_STOPPED 2. unsharing a cgroup namespace makes all your current cgroups your new cgroup root. 3. /proc/pid/cgroup always shows cgroup paths relative to the reader's cgroup namespce root. A task outside of your cgroup looks like 8:memory:/../../.. 4. when a task mounts a cgroupfs, the cgroup which shows up as root depends on the mounting task's cgroup namespace. 5. setns to a cgroup namespace switches your cgroup namespace but not your cgroups. With this, using github.com/hallyn/lxc #2015-11-09/cgns (and github.com/hallyn/lxcfs #2015-11-10/cgns) we can start a container in a full proper cgroup namespace, avoiding either cgmanager or lxcfs cgroup bind mounts. This is completely backward compatible and will be completely invisible to any existing cgroup users (except for those running inside a cgroup namespace and looking at /proc/pid/cgroup of tasks outside their namespace.) Changes from V6: 1. Switch to some WARN_ONs to provide stack traces 2. Rename kernfs_node_distance to kernfs_depth 3. Make sure kernfs_common_ancestor() nodes are from same root 4. Split kernfs changes for cgroup_mount into separate patch 5. Rename kernfs_obtain_root to kernfs_node_dentry (And more, see patch changelogs) Changes from V5: 1. To get a root dentry for cgroup namespace mount, walk the path from the kernfs root dentry. Changes from V4: 1. Move the FS_USERNS_MOUNT flag to last patch 2. Rebase onto cgroup/for-4.5 3. Don't non-init user namespaces to bind new subsystems when mounting. 4. Address feedback from Tejun (thanks). Specificaly, not addressed: . kernfs_obtain_root - walking dentry from kernfs root. (I think that's the only piece) 5. Dropped unused get_task_cgroup fn/patch. 6. Reworked kernfs_path_from_node_locked() to try to simplify the logic. It now finds a common ancestor, walks from the source to it, then back up to the target. Changes from V3: 1. Rebased onto latest cgroup changes. In particular switch to css_set_lock and ns_common. 2. Support all hierarchies. Changes from V2: 1. Added documentation in Documentation/cgroups/namespace.txt 2. Fixed a bug that caused crash 3. Incorporated some other suggestions from last patchset: - removed use of threadgroup_lock() while creating new cgroupns - use task_lock() instead of rcu_read_lock() while accessing task->nsproxy - optimized setns() to own cgroupns - simplified code around sane-behavior mount option parsing 4. Restored ACKs from Serge Hallyn from v1 on few patches that have not changed since then. Changes from V1: 1. No pinning of processes within cgroupns. Tasks can be freely moved across cgroups even outside of their cgroupns-root. Usual DAC/MAC policies apply as before. 2. Path in /proc//cgroup is now always shown and is relative to cgroupns-root. So path can contain '/..' strings depending on cgroupns-root of the reader and cgroup of . 3. setns() does not require the process to first move under target cgroupns-root. Changes form RFC (V0): 1. setns support for cgroupns 2. 'mount -t cgroup cgroup ' from inside a cgroupns now mounts the cgroup hierarcy with cgroupns-root as the filesystem root. 3. writes to cgroup files outside of cgroupns-root are not allowed 4. visibility of /proc//cgroup is further restricted by not showing anything if the is in a sibling cgroupns and its cgroup falls outside your cgroupns-root. ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
[lxc-devel] [PATCH 5/8] kernfs: define kernfs_node_dentry
From: Aditya Kali Add a new kernfs api is added to lookup the dentry for a particular kernfs path. Signed-off-by: Aditya Kali Signed-off-by: Serge E. Hallyn --- Changelog: 20151116 - Don't allow user namespaces to bind new subsystems 20151118 - postpone the FS_USERNS_MOUNT flag until the last patch, until we can convince ourselves it is safe. 20151207 - Switch to walking up the kernfs path from kn root. 20151208 - Split out the kernfs change - Style changes - Switch from pr_crit to WARN_ON - Reorder arguments to kernfs_obtain_root - rename kernfs_obtain_root to kernfs_node_dentry --- fs/kernfs/mount.c | 67 include/linux/kernfs.h |2 ++ 2 files changed, 69 insertions(+) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 8eaf417..7224296 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "kernfs-internal.h" @@ -62,6 +63,72 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) return NULL; } +/* + * find the next ancestor in the path down to @child, where @parent was the + * ancestor whose descendant we want to find. + * + * Say the path is /a/b/c/d. @child is d, @parent is NULL. We return the root + * node. If @parent is b, then we return the node for c. + * Passing in d as @parent is not ok. + */ +static struct kernfs_node * +find_next_ancestor(struct kernfs_node *child, struct kernfs_node *parent) +{ + if (child == parent) { + pr_crit_once("BUG in find_next_ancestor: called with parent == child"); + return NULL; + } + + while (child->parent != parent) { + if (!child->parent) + return NULL; + child = child->parent; + } + + return child; +} + +/** + * kernfs_node_dentry - get a dentry for the given kernfs_node + * @kn: kernfs_node for which a dentry is needed + * @sb: the kernfs super_block + */ +struct dentry *kernfs_node_dentry(struct kernfs_node *kn, + struct super_block *sb) +{ + struct dentry *dentry; + struct kernfs_node *knparent = NULL; + + BUG_ON(sb->s_op != &kernfs_sops); + + dentry = dget(sb->s_root); + + /* Check if this is the root kernfs_node */ + if (!kn->parent) + return dentry; + + knparent = find_next_ancestor(kn, NULL); + if (WARN_ON(!knparent)) + return ERR_PTR(-EINVAL); + + do { + struct dentry *dtmp; + struct kernfs_node *kntmp; + + if (kn == knparent) + return dentry; + kntmp = find_next_ancestor(kn, knparent); + if (WARN_ON(!kntmp)) + return ERR_PTR(-EINVAL); + dtmp = lookup_one_len(kntmp->name, dentry, strlen(kntmp->name)); + dput(dentry); + if (IS_ERR(dtmp)) + return dtmp; + knparent = kntmp; + dentry = dtmp; + } while (1); +} + static int kernfs_fill_super(struct super_block *sb, unsigned long magic) { struct kernfs_super_info *info = kernfs_info(sb); diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index d025ebd..6eba888 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -284,6 +284,8 @@ struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry); struct kernfs_root *kernfs_root_from_sb(struct super_block *sb); struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn); +struct dentry *kernfs_node_dentry(struct kernfs_node *kn, + struct super_block *sb); struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv); void kernfs_destroy_root(struct kernfs_root *root); -- 1.7.9.5 ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
[lxc-devel] [PATCH 2/8] sched: new clone flag CLONE_NEWCGROUP for cgroup namespace
From: Aditya Kali CLONE_NEWCGROUP will be used to create new cgroup namespace. Signed-off-by: Aditya Kali Signed-off-by: Serge Hallyn --- include/uapi/linux/sched.h |3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index cc89dde..5f0fe01 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -21,8 +21,7 @@ #define CLONE_DETACHED 0x0040 /* Unused, ignored */ #define CLONE_UNTRACED 0x0080 /* set if the tracing process can't force CLONE_PTRACE on this clone */ #define CLONE_CHILD_SETTID 0x0100 /* set the TID in the child */ -/* 0x0200 was previously the unused CLONE_STOPPED (Start in stopped state) - and is now available for re-use. */ +#define CLONE_NEWCGROUP0x0200 /* New cgroup namespace */ #define CLONE_NEWUTS 0x0400 /* New utsname namespace */ #define CLONE_NEWIPC 0x0800 /* New ipc namespace */ #define CLONE_NEWUSER 0x1000 /* New user namespace */ -- 1.7.9.5 ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
[lxc-devel] [PATCH 4/8] cgroup: cgroup namespace setns support
From: Aditya Kali setns on a cgroup namespace is allowed only if task has CAP_SYS_ADMIN in its current user-namespace and over the user-namespace associated with target cgroupns. No implicit cgroup changes happen with attaching to another cgroupns. It is expected that the somone moves the attaching process under the target cgroupns-root. Signed-off-by: Aditya Kali Signed-off-by: Serge E. Hallyn --- kernel/cgroup.c | 24 +--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 92db64c..f34551a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5904,10 +5904,28 @@ err_out: return ERR_PTR(err); } -static int cgroupns_install(struct nsproxy *nsproxy, void *ns) +static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns) { - pr_info("setns not supported for cgroup namespace"); - return -EINVAL; + return container_of(ns, struct cgroup_namespace, ns); +} + +static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns) +{ + struct cgroup_namespace *cgroup_ns = to_cg_ns(ns); + + if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) || + !ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + /* Don't need to do anything if we are attaching to our own cgroupns. */ + if (cgroup_ns == nsproxy->cgroup_ns) + return 0; + + get_cgroup_ns(cgroup_ns); + put_cgroup_ns(nsproxy->cgroup_ns); + nsproxy->cgroup_ns = cgroup_ns; + + return 0; } static struct ns_common *cgroupns_get(struct task_struct *task) -- 1.7.9.5 ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel
[lxc-devel] [PATCH 1/8] kernfs: Add API to generate relative kernfs path
From: Aditya Kali The new function kernfs_path_from_node() generates and returns kernfs path of a given kernfs_node relative to a given parent kernfs_node. Signed-off-by: Aditya Kali Signed-off-by: Serge E. Hallyn --- Changelog 20151125: - Fully-wing multilinecomments - Rework kernfs_path_from_node_locked() logic - Replace BUG_ONs with returning NULL - Use a const char* for /.. and precalculate its size Changelog 20151130: - Update kernfs_path_from_node_locked comment Changelog 20151208: - kernfs_node_distance: * Remove BUG_ON(NULL)s * Rename kernfs_node_distance to kernfs_depth - kernfs_common-ancestor: * Remove useless checks for depth == 0 * Add check to ensure nodes are from same root - kernfs_path_from_node_locked: * Remove needless __must_check * Put p;len on its own decl line. * Fix wrong WARN_ONCE usage --- fs/kernfs/dir.c| 177 include/linux/kernfs.h |3 + 2 files changed, 153 insertions(+), 27 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 91e0045..d1a001a 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -44,28 +44,129 @@ static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen) return strlcpy(buf, kn->parent ? kn->name : "/", buflen); } -static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf, - size_t buflen) +/* kernfs_node_depth - compute depth from @from to @to */ +static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to) { - char *p = buf + buflen; - int len; + size_t depth = 0; - *--p = '\0'; + while (to->parent && to != from) { + depth++; + to = to->parent; + } + return depth; +} - do { - len = strlen(kn->name); - if (p - buf < len + 1) { - buf[0] = '\0'; - p = NULL; - break; - } - p -= len; - memcpy(p, kn->name, len); - *--p = '/'; - kn = kn->parent; - } while (kn && kn->parent); +static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a, + struct kernfs_node *b) +{ + size_t da, db; + struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b); - return p; + if (ra != rb) + return NULL; + + da = kernfs_depth(ra->kn, a); + db = kernfs_depth(rb->kn, b); + + while (da > db) { + a = a->parent; + da--; + } + while (db > da) { + b = b->parent; + db--; + } + + /* worst case b and a will be the same at root */ + while (b != a) { + b = b->parent; + a = a->parent; + } + + return a; +} + +/** + * kernfs_path_from_node_locked - find a pseudo-absolute path to @kn_to, + * where kn_from is treated as root of the path. + * @kn_from: kernfs node which should be treated as root for the path + * @kn_to: kernfs node to which path is needed + * @buf: buffer to copy the path into + * @buflen: size of @buf + * + * We need to handle couple of scenarios here: + * [1] when @kn_from is an ancestor of @kn_to at some level + * kn_from: /n1/n2/n3 + * kn_to: /n1/n2/n3/n4/n5 + * result: /n4/n5 + * + * [2] when @kn_from is on a different hierarchy and we need to find common + * ancestor between @kn_from and @kn_to. + * kn_from: /n1/n2/n3/n4 + * kn_to: /n1/n2/n5 + * result: /../../n5 + * OR + * kn_from: /n1/n2/n3/n4/n5 [depth=5] + * kn_to: /n1/n2/n3 [depth=3] + * result: /../.. + */ +static char * +kernfs_path_from_node_locked(struct kernfs_node *kn_to, +struct kernfs_node *kn_from, char *buf, +size_t buflen) +{ + char *p = buf; + struct kernfs_node *kn, *common; + const char parent_str[] = "/.."; + int i; + size_t depth_from, depth_to, len = 0, nlen = 0; + size_t plen = sizeof(parent_str) - 1; + + /* We atleast need 2 bytes to write "/\0". */ + if (buflen < 2) + return NULL; + + if (!kn_from) + kn_from = kernfs_root(kn_to)->kn; + + if (kn_from == kn_to) { + *p = '/'; + *(++p) = '\0'; + return buf; + } + + common = kernfs_common_ancestor(kn_from, kn_to); + if (WARN_ON(!common)) + return NULL; + + depth_to = kernfs_depth(common, kn_to); + depth_from = kernfs_depth(common, kn_from); + + for (i = 0; i < depth_from; i++) { + if (len + plen + 1 > buflen) + return NULL; + strcpy(p, parent_str); + p += plen; + len += plen; + } + + /* Calculate how many bytes we need for the rest
[lxc-devel] [PATCH 3/8] cgroup: introduce cgroup namespaces
From: Aditya Kali Introduce the ability to create new cgroup namespace. The newly created cgroup namespace remembers the cgroup of the process at the point of creation of the cgroup namespace (referred as cgroupns-root). The main purpose of cgroup namespace is to virtualize the contents of /proc/self/cgroup file. Processes inside a cgroup namespace are only able to see paths relative to their namespace root (unless they are moved outside of their cgroupns-root, at which point they will see a relative path from their cgroupns-root). For a correctly setup container this enables container-tools (like libcontainer, lxc, lmctfy, etc.) to create completely virtualized containers without leaking system level cgroup hierarchy to the task. This patch only implements the 'unshare' part of the cgroupns. Signed-off-by: Aditya Kali Signed-off-by: Serge Hallyn --- Changelog: 2015-11-24 - move cgroup_namespace.c into cgroup.c (and .h) - reformatting - make get_cgroup_ns return void - rename ns->root_cgrps to root_cset. Changelog: 2015-12-08 - Move init_cgroup_ns to other variable declarations - Remove accidental conversion of put-css_set to inline - Drop BUG_ON(NULL) - Remove unneeded pre declaration of struct cgroupns_operations. - cgroup.h: collect common ns declerations --- fs/proc/namespaces.c|3 + include/linux/cgroup.h | 54 -- include/linux/nsproxy.h |2 + include/linux/proc_ns.h |4 ++ kernel/cgroup.c | 146 ++- kernel/fork.c |2 +- kernel/nsproxy.c| 21 ++- 7 files changed, 220 insertions(+), 12 deletions(-) diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index f6e8354..bd61075 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -28,6 +28,9 @@ static const struct proc_ns_operations *ns_entries[] = { &userns_operations, #endif &mntns_operations, +#ifdef CONFIG_CGROUPS + &cgroupns_operations, +#endif }; static const char *proc_ns_follow_link(struct dentry *dentry, void **cookie) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 2b3e2314..906e348 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -17,9 +17,57 @@ #include #include #include +#include +#include +#include +#include +#include #include +struct cgroup_namespace { + atomic_tcount; + struct ns_commonns; + struct user_namespace *user_ns; + struct css_set *root_cset; +}; + +extern struct cgroup_namespace init_cgroup_ns; + +#ifdef CONFIG_CGROUPS + +void free_cgroup_ns(struct cgroup_namespace *ns); + +struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, + struct user_namespace *user_ns, + struct cgroup_namespace *old_ns); + +char * cgroup_path(struct cgroup *cgrp, char *buf, size_t buflen); + +#else /* !CONFIG_CGROUPS */ + +static inline void free_cgroup_ns(struct cgroup_namespace *ns) { } +static inline struct cgroup_namespace * +copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, + struct cgroup_namespace *old_ns) +{ + return old_ns; +} + +#endif /* !CONFIG_CGROUPS */ + +static inline void get_cgroup_ns(struct cgroup_namespace *ns) +{ + if (ns) + atomic_inc(&ns->count); +} + +static inline void put_cgroup_ns(struct cgroup_namespace *ns) +{ + if (ns && atomic_dec_and_test(&ns->count)) + free_cgroup_ns(ns); +} + #ifdef CONFIG_CGROUPS /* @@ -509,12 +557,6 @@ static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen) return kernfs_name(cgrp->kn, buf, buflen); } -static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf, - size_t buflen) -{ - return kernfs_path(cgrp->kn, buf, buflen); -} - static inline void pr_cont_cgroup_name(struct cgroup *cgrp) { pr_cont_kernfs_name(cgrp->kn); diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 35fa08f..ac0d65b 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -8,6 +8,7 @@ struct mnt_namespace; struct uts_namespace; struct ipc_namespace; struct pid_namespace; +struct cgroup_namespace; struct fs_struct; /* @@ -33,6 +34,7 @@ struct nsproxy { struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns_for_children; struct net *net_ns; + struct cgroup_namespace *cgroup_ns; }; extern struct nsproxy init_nsproxy; diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 42dfc61..de0e771 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -9,6 +9,8 @@ struct pid_namespace; struct nsproxy; struct path; +struct task_struct; +struct inode; struct proc_ns_operations { const char *name; @@ -24,6 +26,7 @@ extern const struct proc_ns_operations i
Re: [lxc-devel] [PATCH 5/7] cgroup: mount cgroupns-root when inside non-init cgroupns
Hello, Serge. On Tue, Dec 08, 2015 at 05:21:24PM -0600, Serge E. Hallyn wrote: > > Heh, is kernfs_obtain_root() the right name? Maybe > > kernfs_node_to_inode()? > > kernfs_node_to_dentry? > > This would presumably make the question of whether to pass in a namespace > moot? Sounds good. Thanks. -- tejun ___ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel