On Fri, Apr 03, 2015 at 04:41:01PM +0000, Serge Hallyn wrote: > Quoting Tycho Andersen (tycho.ander...@canonical.com): > > Previously, lxcapi_restore used the calling process as the lxc monitor > > process > > (and just never returned), requiring users to fork before calling it. This, > > of > > course, would cause problems for things like LXD, which can't fork. > > > > Now, restore() forks the monitor as a child of the process that calls it. > > Users > > who want to daemonize the restore process need to fork themselves. > > lxc-checkpoint has been updated to reflect this behavior change. > > > > Signed-off-by: Tycho Andersen <tycho.ander...@canonical.com> > > Thanks, Tycho. I'm ok with it as is, > > Acked-by: Serge E. Hallyn <serge.hal...@ubuntu.com> > > but there's one thing that could stand improvement (with a later > patch). The lxcapi_restore() will hang on many errors in the > restoring task bc you don't always send a failure status back over the > socket. Yo ucoudl either alway ssend a failure status, or you could > probably just select() before the read, though you'd have to guess at a > decent timeout.
Good point. I think always sending a failure status sounds good; I'll send a patch for that soon. Thanks, Tycho > > --- > > src/lxc/lxc_checkpoint.c | 48 +++++++++++++------ > > src/lxc/lxccontainer.c | 121 > > +++++++++++++++++++++++++++++++++++------------ > > 2 files changed, 125 insertions(+), 44 deletions(-) > > > > diff --git a/src/lxc/lxc_checkpoint.c b/src/lxc/lxc_checkpoint.c > > index cfa08fc..2e76c2e 100644 > > --- a/src/lxc/lxc_checkpoint.c > > +++ b/src/lxc/lxc_checkpoint.c > > @@ -20,6 +20,8 @@ > > #include <stdio.h> > > #include <errno.h> > > #include <unistd.h> > > +#include <sys/types.h> > > +#include <sys/wait.h> > > > > #include <lxc/lxccontainer.h> > > > > @@ -27,6 +29,7 @@ > > #include "config.h" > > #include "lxc.h" > > #include "arguments.h" > > +#include "utils.h" > > > > static char *checkpoint_dir = NULL; > > static bool stop = false; > > @@ -139,36 +142,53 @@ bool checkpoint(struct lxc_container *c) > > return true; > > } > > > > -bool restore(struct lxc_container *c) > > +bool restore_finalize(struct lxc_container *c) > > { > > - pid_t pid = 0; > > - bool ret = true; > > + bool ret = c->restore(c, checkpoint_dir, verbose); > > + if (!ret) { > > + fprintf(stderr, "Restoring %s failed.\n", my_args.name); > > + } > > > > + lxc_container_put(c); > > + return ret; > > +} > > + > > +bool restore(struct lxc_container *c) > > +{ > > if (c->is_running(c)) { > > fprintf(stderr, "%s is running, not restoring.\n", > > my_args.name); > > lxc_container_put(c); > > return false; > > } > > > > - if (my_args.daemonize) > > + if (my_args.daemonize) { > > + pid_t pid; > > + > > pid = fork(); > > + if (pid < 0) { > > + perror("fork"); > > + return false; > > + } > > > > - if (pid == 0) { > > - if (my_args.daemonize) { > > + if (pid == 0) { > > close(0); > > close(1); > > - } > > > > - ret = c->restore(c, checkpoint_dir, verbose); > > - > > - if (!ret) { > > - fprintf(stderr, "Restoring %s failed.\n", my_args.name); > > + exit(!restore_finalize(c)); > > + } else { > > + return wait_for_pid(pid) == 0; > > } > > - } > > + } else { > > + int status; > > > > - lxc_container_put(c); > > + if (!restore_finalize(c)) > > + return false; > > > > - return ret; > > + if (waitpid(-1, &status, 0) < 0) > > + return false; > > + > > + return WIFEXITED(status) && WEXITSTATUS(status) == 0; > > + } > > } > > > > int main(int argc, char *argv[]) > > diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c > > index 4422f4a..c3369cd 100644 > > --- a/src/lxc/lxccontainer.c > > +++ b/src/lxc/lxccontainer.c > > @@ -3853,28 +3853,20 @@ out_unlock: > > return !has_error; > > } > > > > -static bool lxcapi_restore(struct lxc_container *c, char *directory, bool > > verbose) > > +// do_restore never returns, the calling process is used as the > > +// monitor process. do_restore calls exit() if it fails. > > +static void do_restore(struct lxc_container *c, int pipe, char *directory, > > bool verbose) > > { > > pid_t pid; > > - struct lxc_rootfs *rootfs; > > char pidfile[L_tmpnam]; > > struct lxc_handler *handler; > > - bool has_error = true; > > - > > - if (!criu_ok(c)) > > - return false; > > - > > - if (geteuid()) { > > - ERROR("Must be root to restore\n"); > > - return false; > > - } > > > > if (!tmpnam(pidfile)) > > - return false; > > + exit(1); > > > > handler = lxc_init(c->name, c->lxc_conf, c->config_path); > > if (!handler) > > - return false; > > + exit(1); > > > > if (!cgroup_init(handler)) { > > ERROR("failed initing cgroups"); > > @@ -3897,9 +3889,10 @@ static bool lxcapi_restore(struct lxc_container *c, > > char *directory, bool verbos > > > > if (pid == 0) { > > struct criu_opts os; > > + struct lxc_rootfs *rootfs; > > > > if (unshare(CLONE_NEWNS)) > > - exit(1); > > + goto out_fini_handler; > > > > /* CRIU needs the lxc root bind mounted so that it is the root > > of some > > * mount. */ > > @@ -3907,15 +3900,14 @@ static bool lxcapi_restore(struct lxc_container *c, > > char *directory, bool verbos > > > > if (rootfs_is_blockdev(c->lxc_conf)) { > > if (do_rootfs_setup(c->lxc_conf, c->name, > > c->config_path) < 0) > > - exit(1); > > - } > > - else { > > + goto out_fini_handler; > > + } else { > > if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST) > > - exit(1); > > + goto out_fini_handler; > > > > if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, > > NULL) < 0) { > > rmdir(rootfs->mount); > > - exit(1); > > + goto out_fini_handler; > > } > > } > > > > @@ -3930,22 +3922,30 @@ static bool lxcapi_restore(struct lxc_container *c, > > char *directory, bool verbos > > exec_criu(&os); > > umount(rootfs->mount); > > rmdir(rootfs->mount); > > - exit(1); > > + goto out_fini_handler; > > } else { > > - int status; > > + int status, ret; > > + char title[2048]; > > > > pid_t w = waitpid(pid, &status, 0); > > - > > if (w == -1) { > > perror("waitpid"); > > goto out_fini_handler; > > } > > > > + ret = write(pipe, &status, sizeof(status)); > > + close(pipe); > > + > > + if (sizeof(status) != ret) { > > + perror("write"); > > + ERROR("failed to write all of status"); > > + goto out_fini_handler; > > + } > > + > > if (WIFEXITED(status)) { > > if (WEXITSTATUS(status)) { > > goto out_fini_handler; > > - } > > - else { > > + } else { > > int ret; > > FILE *f = fopen(pidfile, "r"); > > if (!f) { > > @@ -3969,17 +3969,78 @@ static bool lxcapi_restore(struct lxc_container *c, > > char *directory, bool verbos > > goto out_fini_handler; > > } > > > > - if (lxc_poll(c->name, handler)) { > > + /* > > + * See comment in lxcapi_start; we don't care if these > > + * fail because it's just a beauty thing. We just > > + * assign the return here to silence potential. > > + */ > > + ret = snprintf(title, sizeof(title), "[lxc monitor] %s %s", > > c->config_path, c->name); > > + ret = setproctitle(title); > > + > > + ret = lxc_poll(c->name, handler); > > + if (ret) > > lxc_abort(c->name, handler); > > - goto out_fini_handler; > > - } > > + lxc_fini(c->name, handler); > > + exit(ret); > > } > > > > - has_error = false; > > - > > out_fini_handler: > > lxc_fini(c->name, handler); > > - return !has_error; > > + exit(1); > > +} > > + > > +static bool lxcapi_restore(struct lxc_container *c, char *directory, bool > > verbose) > > +{ > > + pid_t pid; > > + int status, nread; > > + int pipefd[2]; > > + > > + if (!criu_ok(c)) > > + return false; > > + > > + if (geteuid()) { > > + ERROR("Must be root to restore\n"); > > + return false; > > + } > > + > > + if (pipe(pipefd)) { > > + ERROR("failed to create pipe"); > > + return false; > > + } > > + > > + pid = fork(); > > + if (pid < 0) { > > + close(pipefd[0]); > > + close(pipefd[1]); > > + return false; > > + } > > + > > + if (pid == 0) { > > + close(pipefd[0]); > > + // this never returns > > + do_restore(c, pipefd[1], directory, verbose); > > + } > > + > > + close(pipefd[1]); > > + > > + nread = read(pipefd[0], &status, sizeof(status)); > > + close(pipefd[0]); > > + if (sizeof(status) != nread) { > > + ERROR("reading status from pipe failed"); > > + goto err_wait; > > + } > > + > > + // If the criu process was killed or exited nonzero, wait() for the > > + // handler, since the restore process died. Otherwise, we don't need to > > + // wait, since the child becomes the monitor process. > > + if (!WIFEXITED(status) || WEXITSTATUS(status)) > > + goto err_wait; > > + return true; > > + > > +err_wait: > > + if (wait_for_pid(pid)) > > + ERROR("restore process died"); > > + return false; > > } > > > > static int lxcapi_attach_run_waitl(struct lxc_container *c, > > lxc_attach_options_t *options, const char *program, const char *arg, ...) > > -- > > 2.1.0 > > > > _______________________________________________ > > lxc-devel mailing list > > lxc-devel@lists.linuxcontainers.org > > http://lists.linuxcontainers.org/listinfo/lxc-devel > _______________________________________________ > lxc-devel mailing list > lxc-devel@lists.linuxcontainers.org > http://lists.linuxcontainers.org/listinfo/lxc-devel _______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel