Quoting Tycho Andersen (tycho.ander...@canonical.com):
> CRIU now supports autodetection of external mounts via the --ext-mount-map 
> auto
> --enable-external-sharing --enable-external-masters options, so we don't need
> to explicitly pass the cgmanager mount or any of the mounts from the config.
> This also means that lxcfs mounts (since they are bind mounts from outside the
> container) are autodetected, meaning that c/r of containers using lxcfs works.
> 
> A further advantage of this patch is that it addresses some of the ugliness
> that was in the exec_criu() function. There are other criu options that will
> allow us to trim this even further, though.
> 
> Finally, with --enable-external-masters, criu understands slave mounts in the
> container with shared mounts in the peer group that are outside the namespace.
> This allows containers on a systemd host to be dumped and restored correctly.
> 
> However, these options have just landed in criu trunk today, and the next
> tagged release will be 1.6 on June 1, so we should avoid merging this into any

Ouch, June 1, that's a ways away.

> stable releases until then.

Should there be a lxc_check_criu_version() fn, updated in cases like these,
which ensures that criu is new enough version for the lxc version?

Given the heavy criu development I do NOT think we should fallback to
different behavior on older versions - just require the minversion we
need and fail with a nice informational message otherwise.

> v2: remount / as private before bind mounting the container's directory for

It's probably a whacky enough case that lxc+criu would fail anyway, but
it *is* possible for / to not be a mountpoint.  Init could chroot you
instead of pivot-rooting.  In that case you'd have to bind-mount / onto
itself before doing the MS_PRIVATE remount.

I'm also kind of surprised it worked without a MS_REMOUNT flag, but <shrug>

>     criu. The problem here is that if / is mounted as shared, even if we
>     unshare() the /var/lib/lxc/rootfs mountpoint propagates outside of our
>     mount namespace, which is bad, since we don't want to leak mounts. In
>     particular, this leak confuses criu the second time it goes to checkpoint
>     the container.
> 
> Signed-off-by: Tycho Andersen <tycho.ander...@canonical.com>

Acked-by: Serge E. Hallyn <serge.hal...@ubuntu.com>

But where do we keep this?  We need a github cronjob to post merge request
on a given date.

> ---
>  src/lxc/lxccontainer.c | 89 
> ++++++++++++--------------------------------------
>  1 file changed, 20 insertions(+), 69 deletions(-)
> 
> diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
> index 3c3ff33..d759703 100644
> --- a/src/lxc/lxccontainer.c
> +++ b/src/lxc/lxccontainer.c
> @@ -3708,18 +3708,18 @@ struct criu_opts {
>  static void exec_criu(struct criu_opts *opts)
>  {
>       char **argv, log[PATH_MAX];
> -     int static_args = 14, argc = 0, i, ret;
> +     int static_args = 18, argc = 0, i, ret;
>       int netnr = 0;
>       struct lxc_list *it;
>  
> -     struct mntent mntent;
>       char buf[4096];
>       FILE *mnts = NULL;
>  
>       /* The command line always looks like:
>        * criu $(action) --tcp-established --file-locks --link-remap 
> --force-irmap \
>        * --manage-cgroups action-script foo.sh -D $(directory) \
> -      * -o $(directory)/$(action).log
> +      * -o $(directory)/$(action).log --ext-mount-map auto
> +      * --enable-external-sharing --enable-external-masters
>        * +1 for final NULL */
>  
>       if (strcmp(opts->action, "dump") == 0) {
> @@ -3746,11 +3746,6 @@ static void exec_criu(struct criu_opts *opts)
>               return;
>       }
>  
> -     // We need to tell criu where cgmanager's socket is bind mounted from
> -     // if it exists since it's external.
> -     if (cgroup_driver() == CGMANAGER)
> -             static_args+=2;
> -
>       argv = malloc(static_args * sizeof(*argv));
>       if (!argv)
>               return;
> @@ -3780,6 +3775,10 @@ static void exec_criu(struct criu_opts *opts)
>       DECLARE_ARG("--link-remap");
>       DECLARE_ARG("--force-irmap");
>       DECLARE_ARG("--manage-cgroups");
> +     DECLARE_ARG("--ext-mount-map");
> +     DECLARE_ARG("auto");
> +     DECLARE_ARG("--enable-external-sharing");
> +     DECLARE_ARG("--enable-external-masters");
>       DECLARE_ARG("--action-script");
>       DECLARE_ARG(DATADIR "/lxc/lxc-restore-net");
>       DECLARE_ARG("-D");
> @@ -3790,35 +3789,9 @@ static void exec_criu(struct criu_opts *opts)
>       if (opts->verbose)
>               DECLARE_ARG("-vvvvvv");
>  
> -     /*
> -      * Note: this macro is not intended to be called unless argc is equal
> -      * to the length of the array; there is nothing that keeps track of the
> -      * length of the array besides the location in the code that this is
> -      * called. (Yes this is bad, and we should fix it.)
> -      */
> -#define RESIZE_ARGS(additional)                                              
> \
> -     do {                                                                    
> \
> -             void *m;                                                        
> \
> -             if (additional < 0) {                                           
> \
> -                     ERROR("resizing by negative amount");                   
> \
> -                     goto err;                                               
> \
> -             } else if (additional == 0)                                     
> \
> -                     continue;                                               
> \
> -                                                                             
> \
> -             m = realloc(argv, (argc + additional + 1) * sizeof(*argv));     
> \
> -             if (!m)                                                         
> \
> -                     goto err;                                               
> \
> -             argv = m;                                                       
> \
> -     } while (0)
> -
>       if (strcmp(opts->action, "dump") == 0) {
>               char pid[32];
>  
> -             if (cgroup_driver() == CGMANAGER) {
> -                     DECLARE_ARG("--ext-mount-map");
> -                     DECLARE_ARG("/sys/fs/cgroup/cgmanager:cgmanager");
> -             }
> -
>               if (sprintf(pid, "%d", lxcapi_init_pid(opts->c)) < 0)
>                       goto err;
>  
> @@ -3827,11 +3800,8 @@ static void exec_criu(struct criu_opts *opts)
>               if (!opts->stop)
>                       DECLARE_ARG("--leave-running");
>       } else if (strcmp(opts->action, "restore") == 0) {
> -
> -             if (cgroup_driver() == CGMANAGER) {
> -                     DECLARE_ARG("--ext-mount-map");
> -                     DECLARE_ARG("cgmanager:/sys/fs/cgroup/cgmanager");
> -             }
> +             void *m;
> +             int additional;
>  
>               DECLARE_ARG("--root");
>               DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
> @@ -3842,7 +3812,12 @@ static void exec_criu(struct criu_opts *opts)
>               DECLARE_ARG("--cgroup-root");
>               DECLARE_ARG(opts->cgroup_path);
>  
> -             RESIZE_ARGS(lxc_list_len(&opts->c->lxc_conf->network) * 2);
> +             additional = lxc_list_len(&opts->c->lxc_conf->network) * 2;
> +
> +             m = realloc(argv, (argc + additional + 1) * sizeof(*argv));     
> \
> +             if (!m)                                                         
> \
> +                     goto err;                                               
> \
> +             argv = m;
>  
>               lxc_list_for_each(it, &opts->c->lxc_conf->network) {
>                       char eth[128], *veth;
> @@ -3864,37 +3839,8 @@ static void exec_criu(struct criu_opts *opts)
>                       DECLARE_ARG("--veth-pair");
>                       DECLARE_ARG(buf);
>               }
> -     }
> -
> -     // CRIU wants to know about any external bind mounts the
> -     // container has.
> -     mnts = write_mount_file(&opts->c->lxc_conf->mount_list);
> -     if (!mnts)
> -             goto err;
> -
> -     RESIZE_ARGS(lxc_list_len(&opts->c->lxc_conf->mount_list) * 2);
> -
> -     while (getmntent_r(mnts, &mntent, buf, sizeof(buf))) {
> -             char arg[2048], *key, *val;
> -             int ret;
> -
> -             if (strcmp(opts->action, "dump") == 0) {
> -                     key = mntent.mnt_fsname;
> -                     val = mntent.mnt_dir;
> -             } else {
> -                     key = mntent.mnt_dir;
> -                     val = mntent.mnt_fsname;
> -             }
> -
> -             ret = snprintf(arg, sizeof(arg), "%s:%s", key, val);
> -             if (ret < 0 || ret >= sizeof(arg)) {
> -                     goto err;
> -             }
>  
> -             DECLARE_ARG("--ext-mount-map");
> -             DECLARE_ARG(arg);
>       }
> -     fclose(mnts);
>  
>       argv[argc] = NULL;
>  
> @@ -4176,6 +4122,11 @@ static void do_restore(struct lxc_container *c, int 
> pipe, char *directory, bool
>                       if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
>                               goto out_fini_handler;
>  
> +                     if (mount(NULL, "/", NULL, MS_PRIVATE, NULL) < 0) {
> +                             SYSERROR("remount / to private failed");
> +                             goto out_fini_handler;
> +                     }
> +
>                       if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, 
> NULL) < 0) {
>                               rmdir(rootfs->mount);
>                               goto out_fini_handler;
> -- 
> 2.1.4
> 
> _______________________________________________
> lxc-devel mailing list
> lxc-devel@lists.linuxcontainers.org
> http://lists.linuxcontainers.org/listinfo/lxc-devel
_______________________________________________
lxc-devel mailing list
lxc-devel@lists.linuxcontainers.org
http://lists.linuxcontainers.org/listinfo/lxc-devel

Reply via email to