Hello! As discussed on IRC, my initial advice about MS_PRIVATE was misguided. The real issue is the “rm_rf (new_root);” call, which removes the root directory and thus leaves child processes (the daemon) with nothing.
The attached patch adds a test loosely based on yours and a fix for that. The fix (for the “userns” engine) is to make NEW_ROOT a tmpfs, such that upon completion, all we need to do is to unmount it and remove it; it lives on as the root file system of child processes. In the “fakechroot” case, we have to leave NEW_ROOT behind, which is not great but acceptable (it’s user-owned, #o700, and it’s under /tmp). The test only checks the “userns” engine. If you confirm that it works for you and looks reasonable, we can apply it. Thanks, Ludo’.
diff --git a/gnu/packages/aux-files/run-in-namespace.c b/gnu/packages/aux-files/run-in-namespace.c index 52a16a5362..1d64ef9f44 100644 --- a/gnu/packages/aux-files/run-in-namespace.c +++ b/gnu/packages/aux-files/run-in-namespace.c @@ -41,6 +41,7 @@ #include <fcntl.h> #include <dirent.h> #include <sys/syscall.h> +#include <sys/prctl.h> /* Whether we're building the ld.so/libfakechroot wrapper. */ #define HAVE_EXEC_WITH_LOADER \ @@ -258,11 +259,20 @@ exec_in_user_namespace (const char *store, int argc, char *argv[]) { /* Spawn @WRAPPED_PROGRAM@ in a separate namespace where STORE is bind-mounted in the right place. */ - int err; + int err, is_tmpfs; char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX")); char *new_store = concat (new_root, original_store); char *cwd = get_current_dir_name (); + /* Become the new parent of grand-children when their parent dies. */ + prctl (PR_SET_CHILD_SUBREAPER, 1); + + /* Optionally, make NEW_ROOT a tmpfs. That way, if we have to leave it + behind because there are sub-processes still running when this wrapper + exits, it's OK. */ + err = mount ("none", new_root, "tmpfs", 0, NULL); + is_tmpfs = (err == 0); + /* Create a child with separate namespaces and set up bind-mounts from there. That way, bind-mounts automatically disappear when the child exits, which simplifies cleanup for the parent. Note: clone is more @@ -300,6 +310,7 @@ exec_in_user_namespace (const char *store, int argc, char *argv[]) /* Failure: user namespaces not supported. */ fprintf (stderr, "%s: error: 'clone' failed: %m\n", argv[0]); rm_rf (new_root); + free (new_root); break; default: @@ -312,10 +323,27 @@ exec_in_user_namespace (const char *store, int argc, char *argv[]) write_id_map (child, "uid_map", getuid ()); write_id_map (child, "gid_map", getgid ()); - int status; + int status, status_other; waitpid (child, &status, 0); - chdir ("/"); /* avoid EBUSY */ - rm_rf (new_root); + + if (is_tmpfs) + { + /* NEW_ROOT lives on in child processes and we no longer need it + to exist as an empty directory in the global namespace. */ + umount (new_root); + rmdir (new_root); + } + /* Check whether there are child processes left. If there are none, + we can remove NEW_ROOT just fine. Conversely, if there are + processes left (for example because this wrapper's child forked), + we have to leave NEW_ROOT behind so that those processes can still + access their root file system (XXX). */ + else if (waitpid (-1 , &status_other, WNOHANG) == -1) + { + chdir ("/"); /* avoid EBUSY */ + rm_rf (new_root); + } + free (new_root); if (WIFEXITED (status)) @@ -490,6 +518,9 @@ exec_with_loader (const char *store, int argc, char *argv[]) setenv ("FAKECHROOT_BASE", new_root, 1); + /* Become the new parent of grand-children when their parent dies. */ + prctl (PR_SET_CHILD_SUBREAPER, 1); + pid_t child = fork (); switch (child) { @@ -507,11 +538,18 @@ exec_with_loader (const char *store, int argc, char *argv[]) default: { - int status; + int status, status_other; waitpid (child, &status, 0); - chdir ("/"); /* avoid EBUSY */ - rm_rf (new_root); - free (new_root); + + /* If there are child processes still running, leave NEW_ROOT around + so they can still access it. XXX: In that case NEW_ROOT is left + behind. */ + if (waitpid (-1 , &status_other, WNOHANG) == -1) + { + chdir ("/"); /* avoid EBUSY */ + rm_rf (new_root); + free (new_root); + } close (2); /* flushing stderr should be silent */ diff --git a/tests/guix-pack-relocatable.sh b/tests/guix-pack-relocatable.sh index a960ecd209..88cbe63b59 100644 --- a/tests/guix-pack-relocatable.sh +++ b/tests/guix-pack-relocatable.sh @@ -58,6 +58,19 @@ run_without_store () fi } +# Wait for the given file to show up. Error out if it doesn't show up in a +# timely fashion. +wait_for_file () +{ + i=0 + while ! test -f "$1" && test $i -lt 20 + do + sleep 0.3 + i=`expr $i + 1` + done + test -f "$1" +} + test_directory="`mktemp -d`" export test_directory trap 'chmod -Rf +w "$test_directory"; rm -rf "$test_directory"' EXIT @@ -129,6 +142,65 @@ case "`uname -m`" in ;; esac +if unshare -r true +then + # Check what happens if the wrapped binary forks and leaves child + # processes behind, like a daemon. The root file system should remain + # available to those child processes. See <https://bugs.gnu.org/44261>. + cat > "$test_directory/manifest.scm" <<EOF +(use-modules (guix)) + +(define daemon + (program-file "daemon" + #~(begin + (use-modules (ice-9 match) + (ice-9 ftw)) + + (call-with-output-file "parent-store" + (lambda (port) + (write (scandir (ungexp (%store-prefix))) + port))) + + (match (primitive-fork) + (0 (sigaction SIGHUP (const #t)) + (call-with-output-file "pid" + (lambda (port) + (display (getpid) port))) + (pause) + (call-with-output-file "child-store" + (lambda (port) + (write (scandir (ungexp (%store-prefix))) + port)))) + (_ #t))))) + +(define package + (computed-file "package" + #~(let ((out (ungexp output))) + (mkdir out) + (mkdir (string-append out "/bin")) + (symlink (ungexp daemon) + (string-append out "/bin/daemon"))))) + +(manifest (list (manifest-entry + (name "daemon") + (version "0") + (item package)))) +EOF + + tarball="$(guix pack -S /bin=bin -R -m "$test_directory/manifest.scm")" + (cd "$test_directory"; tar xf "$tarball") + + # Run '/bin/daemon', which forks, then wait for the child, send it SIGHUP + # so that it dumps its view of the store, and make sure the child and + # parent both see the same store contents. + (cd "$test_directory"; run_without_store ./bin/daemon) + wait_for_file "$test_directory/pid" + kill -HUP $(cat "$test_directory/pid") + diff -u "$test_directory/parent-store" "$test_directory/child-store" + + chmod -Rf +w "$test_directory"; rm -rf "$test_directory"/* +fi + # Ensure '-R' works with outputs other than "out". tarball="`guix pack -R -S /share=share groff:doc`" (cd "$test_directory"; tar xf "$tarball")