guix_mirror_bot pushed a commit to branch master
in repository guix.

commit e0e64be8de3d220a12612b3a2e4aee428277d865
Author: Ludovic Courtès <[email protected]>
AuthorDate: Mon Oct 13 10:39:21 2025 +0200

    linux-container: Remove #:lock-mounts? and related code.
    
    This reverts commits 437bb9ece55f37d4b5a62cafc98c0c3b848a53ce and
    a57ed987ffd1452ba5a4d70feb54893e99b8e076, which were reported in
    guix/guix#1169 to occasionally cause errors like:
    
      guix shell: error: unshare : 268566528: Invalid argument
---
 gnu/build/linux-container.scm  | 111 ++++++++++++++---------------------------
 gnu/system/linux-container.scm |   4 --
 tests/containers.scm           |  33 ++----------
 3 files changed, 41 insertions(+), 107 deletions(-)

diff --git a/gnu/build/linux-container.scm b/gnu/build/linux-container.scm
index b6f8563f7d..0df51c390b 100644
--- a/gnu/build/linux-container.scm
+++ b/gnu/build/linux-container.scm
@@ -190,10 +190,7 @@ for the process."
       (remount-read-only "/"))))
 
 (define* (initialize-user-namespace pid host-uids
-                                    #:key
-                                    (host-uid (getuid))
-                                    (host-gid (getgid))
-                                    (guest-uid 0) (guest-gid 0))
+                                    #:key (guest-uid 0) (guest-gid 0))
   "Configure the user namespace for PID.  HOST-UIDS specifies the number of
 host user identifiers to map into the user namespace.  GUEST-UID and GUEST-GID
 specify the first UID (respectively GID) that host UIDs (respectively GIDs)
@@ -204,21 +201,24 @@ map to in the namespace."
   (define (scope file)
     (string-append proc-dir file))
 
-  ;; Only root can write to the gid map without first disabling the
-  ;; setgroups syscall.
-  (unless (and (zero? host-uid) (zero? host-gid))
-    (call-with-output-file (scope "/setgroups")
-      (lambda (port)
-        (display "deny" port))))
+  (let ((uid (getuid))
+        (gid (getgid)))
+
+    ;; Only root can write to the gid map without first disabling the
+    ;; setgroups syscall.
+    (unless (and (zero? uid) (zero? gid))
+      (call-with-output-file (scope "/setgroups")
+        (lambda (port)
+          (display "deny" port))))
 
-  ;; Map the user/group that created the container to the root user
-  ;; within the container.
-  (call-with-output-file (scope "/uid_map")
-    (lambda (port)
-      (format port "~d ~d ~d" guest-uid host-uid host-uids)))
-  (call-with-output-file (scope "/gid_map")
-    (lambda (port)
-      (format port "~d ~d ~d" guest-gid host-gid host-uids))))
+    ;; Map the user/group that created the container to the root user
+    ;; within the container.
+    (call-with-output-file (scope "/uid_map")
+      (lambda (port)
+        (format port "~d ~d ~d" guest-uid uid host-uids)))
+    (call-with-output-file (scope "/gid_map")
+      (lambda (port)
+        (format port "~d ~d ~d" guest-gid gid host-uids)))))
 
 (define (namespaces->bit-mask namespaces)
   "Return the number suitable for the 'flags' argument of 'clone' that
@@ -239,14 +239,12 @@ corresponds to the symbols in NAMESPACES."
                         #:key (guest-uid 0) (guest-gid 0)
                         (populate-file-system (const #t))
                         (loopback-network? #t)
-                        (lock-mounts? #t)
                         writable-root?)
   "Run THUNK in a new container process and return its PID.  ROOT specifies
 the root directory for the container.  MOUNTS is a list of <file-system>
 objects that specify file systems to mount inside the container.  NAMESPACES
 is a list of symbols that correspond to the possible Linux namespaces: mnt,
-ipc, uts, user, and net.  When LOCK-MOUNTS? is true, arrange so that none of
-MOUNTS can be unmounted or remounted individually from within THUNK.
+ipc, uts, user, and net.
 
 When LOOPBACK-NETWORK? is true and 'net is amount NAMESPACES, set up the
 loopback device (\"lo\") and a minimal /etc/hosts.
@@ -306,28 +304,6 @@ that host UIDs (respectively GIDs) map to in the 
namespace."
                       ;; cannot be 'read' so they shouldn't be written as is.
                       (write args child)
                       (primitive-exit 3))))
-
-                (when (and lock-mounts?
-                           (memq 'mnt namespaces)
-                           (memq 'user namespaces))
-                  ;; Create a new mount namespace owned by a new user
-                  ;; namespace to "lock" together previous mounts, such that
-                  ;; they cannot be unmounted or remounted separately--see
-                  ;; mount_namespaces(7).
-                  ;;
-                  ;; Note: at this point, the process is single-threaded (no
-                  ;; GC mark threads, no finalization thread, etc.) which is
-                  ;; why unshare(CLONE_NEWUSER) can be used.
-                  (let ((uid (getuid)) (gid (getgid)))
-                    (unshare (logior CLONE_NEWUSER CLONE_NEWNS))
-                    (when (file-exists? "/proc/self")
-                      (initialize-user-namespace (getpid)
-                                                 host-uids
-                                                 #:host-uid uid
-                                                 #:host-gid gid
-                                                 #:guest-uid guest-uid
-                                                 #:guest-gid guest-gid))))
-
                 ;; TODO: Manage capabilities.
                 (write 'ready child)
                 (close-port child)
@@ -400,7 +376,6 @@ if there are no child processes left."
 
 (define* (call-with-container mounts thunk #:key (namespaces %namespaces)
                               (host-uids 1) (guest-uid 0) (guest-gid 0)
-                              (lock-mounts? #t)
                               (relayed-signals (list SIGINT SIGTERM))
                               (child-is-pid1? #t)
                               (populate-file-system (const #t))
@@ -485,7 +460,6 @@ load path must be adjusted as needed."
   (call-with-temporary-directory
    (lambda (root)
      (let ((pid (run-container root mounts namespaces host-uids thunk*
-                               #:lock-mounts? lock-mounts?
                                #:guest-uid guest-uid
                                #:guest-gid guest-gid
                                #:populate-file-system populate-file-system
@@ -506,35 +480,24 @@ return the exit status, an integer as returned by 
'waitpid'."
     (0
      (call-with-clean-exit
       (lambda ()
-        ;; First, determine the user namespace that owns the pid namespace and
-        ;; join that user namespace (the assumption is that it also owns all
-        ;; the other namespaces).  It's important that the user namespace is
-        ;; joined first, so that the user will have the privileges to join the
-        ;; other namespaces.
-        (let* ((pid-ns (open-fdes (namespace-file pid "pid")
-                                  (logior O_CLOEXEC O_RDONLY)))
-               (user-ns (get-user-ns pid-ns)))
-          (close-fdes pid-ns)
-          (unless (equal? (stat user-ns)
-                          (stat (namespace-file (getpid) "user")))
-            (setns user-ns 0))
-          (close-fdes user-ns)
-
-          ;; Then join all the remaining namespaces.
-          (for-each (lambda (ns)
-                      (let ((source (namespace-file (getpid) ns))
-                            (target (namespace-file pid ns)))
-                        ;; Joining the namespace that the process already
-                        ;; belongs to would throw an error so avoid that.
-                        ;; XXX: This /proc interface leads to TOCTTOU.
-                        (unless (string=? (readlink source) (readlink target))
-                          (call-with-input-file target
-                            (lambda (new-ns-port)
-                              (setns (fileno new-ns-port) 0))))))
-                    ;; It's important that the mount namespace is joined last,
-                    ;; otherwise the /proc mount point would no longer be
-                    ;; accessible.
-                    '("ipc" "uts" "net" "pid" "mnt")))
+        (for-each (lambda (ns)
+                    (let ((source (namespace-file (getpid) ns))
+                          (target (namespace-file pid ns)))
+                      ;; Joining the namespace that the process already
+                      ;; belongs to would throw an error so avoid that.
+                      ;; XXX: This /proc interface leads to TOCTTOU.
+                      (unless (string=? (readlink source) (readlink target))
+                        (call-with-input-file source
+                          (lambda (current-ns-port)
+                            (call-with-input-file target
+                              (lambda (new-ns-port)
+                                (setns (fileno new-ns-port) 0))))))))
+                  ;; It's important that the user namespace is joined first,
+                  ;; so that the user will have the privileges to join the
+                  ;; other namespaces.  Furthermore, it's important that the
+                  ;; mount namespace is joined last, otherwise the /proc mount
+                  ;; point would no longer be accessible.
+                  '("user" "ipc" "uts" "net" "pid" "mnt"))
         (purify-environment)
         (chdir "/")
 
diff --git a/gnu/system/linux-container.scm b/gnu/system/linux-container.scm
index d16d1e78b5..9bcdf24a7e 100644
--- a/gnu/system/linux-container.scm
+++ b/gnu/system/linux-container.scm
@@ -317,10 +317,6 @@ Run the container with the given options."))
                 #:namespaces (if #$shared-network?
                                  (delq 'net %namespaces)
                                  %namespaces)
-
-                ;; XXX: Work around <https://issues.guix.gnu.org/78356>.
-                #:lock-mounts? #f
-
                 #:writable-root? #t
                 #:process-spawned-hook explain)))))
 
diff --git a/tests/containers.scm b/tests/containers.scm
index 6edea9631d..1e915d517e 100644
--- a/tests/containers.scm
+++ b/tests/containers.scm
@@ -1,6 +1,6 @@
 ;;; GNU Guix --- Functional package management for GNU
 ;;; Copyright © 2015 David Thompson <[email protected]>
-;;; Copyright © 2016-2017, 2019, 2023, 2025 Ludovic Courtès <[email protected]>
+;;; Copyright © 2016, 2017, 2019, 2023 Ludovic Courtès <[email protected]>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -110,26 +110,6 @@
        (assert-exit (file-exists? "/testing")))
      #:namespaces '(user mnt))))
 
-(skip-if-unsupported)
-(test-equal "call-with-container, mnt namespace, locked mounts"
-  EINVAL
-  ;; umount(2) fails with EINVAL when targeting a mount point that is
-  ;; "locked".
-  (status:exit-val
-   (call-with-container (list (file-system
-                                (device "none")
-                                (mount-point "/testing")
-                                (type "tmpfs")
-                                (check? #f)))
-     (lambda ()
-       (primitive-exit (catch 'system-error
-                         (lambda ()
-                           (umount "/testing")
-                           0)
-                         (lambda args
-                           (system-error-errno args)))))
-     #:namespaces '(user mnt))))
-
 (skip-if-unsupported)
 (test-equal "call-with-container, mnt namespace, wrong bind mount"
   `(system-error ,ENOENT)
@@ -189,8 +169,7 @@
      #:namespaces '(user mnt))))
 
 (skip-if-unsupported)
-(test-equal "container-excursion"
-  0
+(test-assert "container-excursion"
   (call-with-temporary-directory
    (lambda (root)
      ;; Two pipes: One for the container to signal that the test can begin,
@@ -214,11 +193,7 @@
                    (readlink (string-append "/proc/" pid "/ns/" ns)))
                  '("user" "ipc" "uts" "net" "pid" "mnt"))))
 
-        (let* ((pid (run-container root '() %namespaces 1 container
-                                   ;; Do not lock mounts so the user namespace
-                                   ;; appears to be the same seen from inside
-                                   ;; and from outside.
-                                   #:lock-mounts? #f))
+        (let* ((pid (run-container root '() %namespaces 1 container))
                (container-namespaces (namespaces pid))
                (result
                 (begin
@@ -238,7 +213,7 @@
           (write 'done end-out)
           (close end-out)
           (waitpid pid)
-          result))))))
+          (zero? result)))))))
 
 (skip-if-unsupported)
 (test-equal "container-excursion, same namespaces"

Reply via email to