The following pull request was submitted through Github.
It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/8042

This e-mail was sent by the LXC bot, direct replies will not reach the author
unless they happen to be subscribed to this list.

=== Description (from pull-request) ===
Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>
From b76ffcd374cf5577892fff39a13fa56e33ee117f Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brau...@ubuntu.com>
Date: Fri, 16 Oct 2020 10:21:55 +0200
Subject: [PATCH 1/3] seccomp: switch back to pread()

process_vm_readv() can read memory from another task if it goes away.

Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>
---
 lxd/seccomp/seccomp.go | 112 ++++++++++++++++++-----------------------
 1 file changed, 49 insertions(+), 63 deletions(-)

diff --git a/lxd/seccomp/seccomp.go b/lxd/seccomp/seccomp.go
index 7a4553dee2..3959fab4b4 100644
--- a/lxd/seccomp/seccomp.go
+++ b/lxd/seccomp/seccomp.go
@@ -1620,66 +1620,67 @@ func (s *Server) HandleMountSyscall(c Instance, siov 
*Iovec) int {
                defer pidFd.Close()
        }
 
-       buf1 := [4096]C.char{}
-       buf2 := [4096]C.char{}
-       buf3 := [4096]C.char{}
-       buf4 := [4096]C.char{}
-
-       // process_vm_readv() doesn't like crossing page boundaries when
-       // reading individual syscall args.
-       bufSize := 4096
-       if bufSize > pageSize {
-               bufSize = pageSize
-       }
-
-       mntSource := buf1[:bufSize]
-       mntTarget := buf2[:bufSize]
-       mntFs := buf3[:bufSize]
-       mntData := buf4[:bufSize]
-
-       localIov := []unix.Iovec{
-               {Base: (*byte)(unsafe.Pointer(&mntSource[0]))},
-               {Base: (*byte)(unsafe.Pointer(&mntTarget[0]))},
-               {Base: (*byte)(unsafe.Pointer(&mntFs[0]))},
-               {Base: (*byte)(unsafe.Pointer(&mntData[0]))},
-       }
-
-       remoteIov := []unix.RemoteIovec{
-               {Base: uintptr(siov.req.data.args[0])},
-               {Base: uintptr(siov.req.data.args[1])},
-               {Base: uintptr(siov.req.data.args[2])},
-               {Base: uintptr(siov.req.data.args[4])},
-       }
+       mntSource := [unix.PathMax]C.char{}
+       mntTarget := [unix.PathMax]C.char{}
+       mntFs := [unix.PathMax]C.char{}
+       mntData := [unix.PathMax]C.char{}
 
+       // const char *source
        if siov.req.data.args[0] != 0 {
-               localIov[0].SetLen(bufSize)
-               remoteIov[0].Len = bufSize
+               _, err := C.pread(C.int(siov.memFd), 
unsafe.Pointer(&mntSource[0]), C.size_t(unix.PathMax), 
C.off_t(siov.req.data.args[0]))
+               if err != nil {
+                       ctx["err"] = fmt.Sprintf("Failed to read source path 
for of mount syscall: %s", err)
+                       ctx["syscall_continue"] = "true"
+                       C.seccomp_notify_update_response(siov.resp, 0, 
C.uint32_t(seccompUserNotifFlagContinue))
+                       return 0
+               }
        }
+       args.source = C.GoString(&mntSource[0])
+       ctx["source"] = args.source
 
+       // const char *target
        if siov.req.data.args[1] != 0 {
-               localIov[1].SetLen(bufSize)
-               remoteIov[1].Len = bufSize
+               _, err := C.pread(C.int(siov.memFd), 
unsafe.Pointer(&mntTarget[0]), C.size_t(unix.PathMax), 
C.off_t(siov.req.data.args[1]))
+               if err != nil {
+                       ctx["err"] = fmt.Sprintf("Failed to read target path 
for of mount syscall: %s", err)
+                       ctx["syscall_continue"] = "true"
+                       C.seccomp_notify_update_response(siov.resp, 0, 
C.uint32_t(seccompUserNotifFlagContinue))
+                       return 0
+               }
        }
+       args.target = C.GoString(&mntTarget[0])
+       ctx["target"] = args.target
 
-       if siov.req.data.args[2] != 0 {
-               localIov[2].SetLen(bufSize)
-               remoteIov[2].Len = bufSize
+       // const char *filesystemtype
+       if siov.req.data.args[1] != 0 {
+               _, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&mntFs[0]), 
C.size_t(unix.PathMax), C.off_t(siov.req.data.args[2]))
+               if err != nil {
+                       ctx["err"] = fmt.Sprintf("Failed to read fstype for of 
mount syscall: %s", err)
+                       ctx["syscall_continue"] = "true"
+                       C.seccomp_notify_update_response(siov.resp, 0, 
C.uint32_t(seccompUserNotifFlagContinue))
+                       return 0
+               }
        }
+       args.fstype = C.GoString(&mntFs[0])
+       ctx["fstype"] = args.fstype
 
-       if siov.req.data.args[4] != 0 {
-               localIov[3].SetLen(bufSize)
-               remoteIov[3].Len = bufSize
-       }
+       // unsigned long mountflags
+       args.flags = int(siov.req.data.args[3])
 
-       _, err := unix.ProcessVMReadv(args.pid, localIov, remoteIov, 0)
-       if err != nil {
-               ctx["err"] = fmt.Sprintf("Failed to read process memory of 
mount syscall: %s", err)
-               ctx["syscall_continue"] = "true"
-               C.seccomp_notify_update_response(siov.resp, 0, 
C.uint32_t(seccompUserNotifFlagContinue))
-               return 0
+       // const void *data
+       if siov.req.data.args[4] != 0 {
+               _, err := C.pread(C.int(siov.memFd), 
unsafe.Pointer(&mntData[0]), C.size_t(unix.PathMax), 
C.off_t(siov.req.data.args[4]))
+               if err != nil {
+                       ctx["err"] = fmt.Sprintf("Failed to read mount data for 
of mount syscall: %s", err)
+                       ctx["syscall_continue"] = "true"
+                       C.seccomp_notify_update_response(siov.resp, 0, 
C.uint32_t(seccompUserNotifFlagContinue))
+                       return 0
+               }
        }
+       args.data = C.GoString(&mntData[0])
+       ctx["data"] = args.data
 
-       err = shared.PidfdSendSignal(int(pidFd.Fd()), 0, 0)
+       err := shared.PidfdSendSignal(int(pidFd.Fd()), 0, 0)
        if err != nil {
                ctx["err"] = fmt.Sprintf("Failed to send signal to target 
process for of mount syscall: %s", err)
                ctx["syscall_continue"] = "true"
@@ -1687,21 +1688,6 @@ func (s *Server) HandleMountSyscall(c Instance, siov 
*Iovec) int {
                return 0
        }
 
-       // const char *source
-       args.source = C.GoString(&mntSource[0])
-       ctx["source"] = args.source
-       // const char *target
-       args.target = C.GoString(&mntTarget[0])
-       ctx["target"] = args.target
-       // const char *filesystemtype
-       args.fstype = C.GoString(&mntFs[0])
-       ctx["fstype"] = args.fstype
-       // unsigned long mountflags
-       args.flags = int(siov.req.data.args[3])
-       // const void *data
-       args.data = C.GoString(&mntData[0])
-       ctx["data"] = args.data
-
        ok, fuseBinary := s.MountSyscallValid(c, &args)
        if !ok {
                ctx["syscall_continue"] = "true"

From 1dd4a0b1705fcf55f581ff0dcfceb5ec24979098 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brau...@ubuntu.com>
Date: Fri, 16 Oct 2020 10:54:26 +0200
Subject: [PATCH 2/3] nsexec: simplify userns attach

Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>
---
 lxd/main_nsexec.go | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/lxd/main_nsexec.go b/lxd/main_nsexec.go
index 3918547b57..3bfe019832 100644
--- a/lxd/main_nsexec.go
+++ b/lxd/main_nsexec.go
@@ -142,7 +142,7 @@ int preserve_ns(pid_t pid, int ns_fd, const char *ns)
 // If the two processes are not in the same namespace returns an fd to the
 // namespace of the second process identified by @pid2. If the two processes 
are
 // in the same namespace returns -EINVAL, -1 if an error occurred.
-static int in_same_namespace(pid_t pid1, pid_t pid2, int ns_fd_pid2, const 
char *ns)
+static int in_same_namespace(pid_t pid1, int ns_fd_pid2, const char *ns)
 {
        __do_close int ns_fd1 = -EBADF, ns_fd2 = -EBADF;
        int ret = -1;
@@ -158,7 +158,7 @@ static int in_same_namespace(pid_t pid1, pid_t pid2, int 
ns_fd_pid2, const char
                return -1;
        }
 
-       ns_fd2 = preserve_ns(pid2, ns_fd_pid2, ns);
+       ns_fd2 = preserve_ns(-ESRCH, ns_fd_pid2, ns);
        if (ns_fd2 < 0)
                return -1;
 
@@ -178,12 +178,12 @@ static int in_same_namespace(pid_t pid1, pid_t pid2, int 
ns_fd_pid2, const char
        return move_fd(ns_fd2);
 }
 
-static void __attach_userns(int pid, int ns_fd)
+void attach_userns_fd(int ns_fd)
 {
        __do_close int userns_fd = -EBADF;
        int ret;
 
-       userns_fd = in_same_namespace(getpid(), pid, ns_fd, "user");
+       userns_fd = in_same_namespace(getpid(), ns_fd, "user");
        if (userns_fd < 0) {
                if (userns_fd == -EINVAL)
                        return;
@@ -216,16 +216,6 @@ static void __attach_userns(int pid, int ns_fd)
        }
 }
 
-void attach_userns(int pid)
-{
-       return __attach_userns(pid, -EBADF);
-}
-
-void attach_userns_fd(int ns_fd)
-{
-       return __attach_userns(-1, ns_fd);
-}
-
 int pidfd_nsfd(int pidfd, pid_t pid)
 {
        __do_close int ns_fd = -EBADF;

From d57813b900c4a9ae3b55b22fb87baf18e1780f2b Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brau...@ubuntu.com>
Date: Fri, 16 Oct 2020 12:17:16 +0200
Subject: [PATCH 3/3] forksyscall: preserve root and cwd fds for shifted mount
 emulation

Otherwise we won't be able to reacquire the root and current working directory
of the target task.

Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>
---
 lxd/main_forksyscall.go | 43 +++++++++++++++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/lxd/main_forksyscall.go b/lxd/main_forksyscall.go
index a5b9c565da..be2839f2a0 100644
--- a/lxd/main_forksyscall.go
+++ b/lxd/main_forksyscall.go
@@ -57,7 +57,8 @@ static bool chdirchroot_in_mntns(int cwd_fd, int root_fd)
        return true;
 }
 
-static bool acquire_basic_creds(pid_t pid, int pidfd, int ns_fd)
+static bool acquire_basic_creds(pid_t pid, int pidfd, int ns_fd,
+                               int *rootfd, int *cwdfd)
 {
        __do_close int cwd_fd = -EBADF, root_fd = -EBADF;
        char buf[256];
@@ -75,7 +76,27 @@ static bool acquire_basic_creds(pid_t pid, int pidfd, int 
ns_fd)
        if (!change_namespaces(pidfd, ns_fd, CLONE_NEWNS))
                return false;
 
-       return chdirchroot_in_mntns(cwd_fd, root_fd);
+       if (!chdirchroot_in_mntns(cwd_fd, root_fd))
+               return false;
+
+       if (rootfd)
+               *rootfd = move_fd(root_fd);
+
+       if (cwdfd)
+               *cwdfd = move_fd(cwd_fd);
+       return true;
+}
+
+static bool reacquire_basic_creds(int pidfd, int ns_fd,
+                                 int root_fd, int cwd_fd)
+{
+       if (!change_namespaces(pidfd, ns_fd, CLONE_NEWNS))
+               return false;
+
+       if (!chdirchroot_in_mntns(cwd_fd, root_fd))
+               return false;
+
+       return true;
 }
 
 static bool acquire_final_creds(pid_t pid, uid_t uid, gid_t gid, uid_t fsuid, 
gid_t fsgid)
@@ -157,7 +178,7 @@ static void mknod_emulate(void)
        fsuid = atoi(advance_arg(true));
        fsgid = atoi(advance_arg(true));
 
-       if (!acquire_basic_creds(pid, pidfd, ns_fd)) {
+       if (!acquire_basic_creds(pid, pidfd, ns_fd, NULL, NULL)) {
                fprintf(stderr, "%d", ENOANO);
                _exit(EXIT_FAILURE);
        }
@@ -257,7 +278,7 @@ static void setxattr_emulate(void)
        size = atoi(advance_arg(true));
        data = advance_arg(true);
 
-       if (!acquire_basic_creds(pid, pidfd, ns_fd)) {
+       if (!acquire_basic_creds(pid, pidfd, ns_fd, NULL, NULL)) {
                fprintf(stderr, "%d", ENOANO);
                _exit(EXIT_FAILURE);
        }
@@ -336,7 +357,8 @@ static int make_tmpfile(char *template, bool dir)
 
 static void mount_emulate(void)
 {
-       __do_close int mnt_fd = -EBADF, pidfd = -EBADF, ns_fd = -EBADF;
+       __do_close int mnt_fd = -EBADF, pidfd = -EBADF, ns_fd = -EBADF,
+                  root_fd = -EBADF, cwd_fd = -EBADF;
        char *source = NULL, *shiftfs = NULL, *target = NULL, *fstype = NULL;
        bool use_fuse;
        uid_t nsuid = -1, uid = -1, nsfsuid = -1, fsuid = -1;
@@ -384,7 +406,7 @@ static void mount_emulate(void)
                change_namespaces(pidfd, ns_fd, CLONE_NEWPID);
        }
 
-       if (!acquire_basic_creds(pid, pidfd, ns_fd))
+       if (!acquire_basic_creds(pid, pidfd, ns_fd, &root_fd, &cwd_fd))
                _exit(EXIT_FAILURE);
 
        if (!acquire_final_creds(pid, uid, gid, fsuid, fsgid))
@@ -441,8 +463,13 @@ static void mount_emulate(void)
                        _exit(EXIT_FAILURE);
                }
 
-               attach_userns_fd(ns_fd);
-               if (!acquire_basic_creds(pid, pidfd, ns_fd)) {
+               if (!change_namespaces(pidfd, ns_fd, CLONE_NEWUSER)) {
+                       umount2(target, MNT_DETACH);
+                       umount2(target, MNT_DETACH);
+                       _exit(EXIT_FAILURE);
+               }
+
+               if (!reacquire_basic_creds(pidfd, ns_fd, root_fd, cwd_fd)) {
                        umount2(target, MNT_DETACH);
                        umount2(target, MNT_DETACH);
                        _exit(EXIT_FAILURE);
_______________________________________________
lxc-devel mailing list
lxc-devel@lists.linuxcontainers.org
http://lists.linuxcontainers.org/listinfo/lxc-devel

Reply via email to