The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/8042
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>
From b76ffcd374cf5577892fff39a13fa56e33ee117f Mon Sep 17 00:00:00 2001 From: Christian Brauner <christian.brau...@ubuntu.com> Date: Fri, 16 Oct 2020 10:21:55 +0200 Subject: [PATCH 1/3] seccomp: switch back to pread() process_vm_readv() can read memory from another task if it goes away. Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com> --- lxd/seccomp/seccomp.go | 112 ++++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 63 deletions(-) diff --git a/lxd/seccomp/seccomp.go b/lxd/seccomp/seccomp.go index 7a4553dee2..3959fab4b4 100644 --- a/lxd/seccomp/seccomp.go +++ b/lxd/seccomp/seccomp.go @@ -1620,66 +1620,67 @@ func (s *Server) HandleMountSyscall(c Instance, siov *Iovec) int { defer pidFd.Close() } - buf1 := [4096]C.char{} - buf2 := [4096]C.char{} - buf3 := [4096]C.char{} - buf4 := [4096]C.char{} - - // process_vm_readv() doesn't like crossing page boundaries when - // reading individual syscall args. - bufSize := 4096 - if bufSize > pageSize { - bufSize = pageSize - } - - mntSource := buf1[:bufSize] - mntTarget := buf2[:bufSize] - mntFs := buf3[:bufSize] - mntData := buf4[:bufSize] - - localIov := []unix.Iovec{ - {Base: (*byte)(unsafe.Pointer(&mntSource[0]))}, - {Base: (*byte)(unsafe.Pointer(&mntTarget[0]))}, - {Base: (*byte)(unsafe.Pointer(&mntFs[0]))}, - {Base: (*byte)(unsafe.Pointer(&mntData[0]))}, - } - - remoteIov := []unix.RemoteIovec{ - {Base: uintptr(siov.req.data.args[0])}, - {Base: uintptr(siov.req.data.args[1])}, - {Base: uintptr(siov.req.data.args[2])}, - {Base: uintptr(siov.req.data.args[4])}, - } + mntSource := [unix.PathMax]C.char{} + mntTarget := [unix.PathMax]C.char{} + mntFs := [unix.PathMax]C.char{} + mntData := [unix.PathMax]C.char{} + // const char *source if siov.req.data.args[0] != 0 { - localIov[0].SetLen(bufSize) - remoteIov[0].Len = bufSize + _, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&mntSource[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[0])) + if err != nil { + ctx["err"] = fmt.Sprintf("Failed to read source path for of mount syscall: %s", err) + ctx["syscall_continue"] = "true" + C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue)) + return 0 + } } + args.source = C.GoString(&mntSource[0]) + ctx["source"] = args.source + // const char *target if siov.req.data.args[1] != 0 { - localIov[1].SetLen(bufSize) - remoteIov[1].Len = bufSize + _, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&mntTarget[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[1])) + if err != nil { + ctx["err"] = fmt.Sprintf("Failed to read target path for of mount syscall: %s", err) + ctx["syscall_continue"] = "true" + C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue)) + return 0 + } } + args.target = C.GoString(&mntTarget[0]) + ctx["target"] = args.target - if siov.req.data.args[2] != 0 { - localIov[2].SetLen(bufSize) - remoteIov[2].Len = bufSize + // const char *filesystemtype + if siov.req.data.args[1] != 0 { + _, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&mntFs[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[2])) + if err != nil { + ctx["err"] = fmt.Sprintf("Failed to read fstype for of mount syscall: %s", err) + ctx["syscall_continue"] = "true" + C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue)) + return 0 + } } + args.fstype = C.GoString(&mntFs[0]) + ctx["fstype"] = args.fstype - if siov.req.data.args[4] != 0 { - localIov[3].SetLen(bufSize) - remoteIov[3].Len = bufSize - } + // unsigned long mountflags + args.flags = int(siov.req.data.args[3]) - _, err := unix.ProcessVMReadv(args.pid, localIov, remoteIov, 0) - if err != nil { - ctx["err"] = fmt.Sprintf("Failed to read process memory of mount syscall: %s", err) - ctx["syscall_continue"] = "true" - C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue)) - return 0 + // const void *data + if siov.req.data.args[4] != 0 { + _, err := C.pread(C.int(siov.memFd), unsafe.Pointer(&mntData[0]), C.size_t(unix.PathMax), C.off_t(siov.req.data.args[4])) + if err != nil { + ctx["err"] = fmt.Sprintf("Failed to read mount data for of mount syscall: %s", err) + ctx["syscall_continue"] = "true" + C.seccomp_notify_update_response(siov.resp, 0, C.uint32_t(seccompUserNotifFlagContinue)) + return 0 + } } + args.data = C.GoString(&mntData[0]) + ctx["data"] = args.data - err = shared.PidfdSendSignal(int(pidFd.Fd()), 0, 0) + err := shared.PidfdSendSignal(int(pidFd.Fd()), 0, 0) if err != nil { ctx["err"] = fmt.Sprintf("Failed to send signal to target process for of mount syscall: %s", err) ctx["syscall_continue"] = "true" @@ -1687,21 +1688,6 @@ func (s *Server) HandleMountSyscall(c Instance, siov *Iovec) int { return 0 } - // const char *source - args.source = C.GoString(&mntSource[0]) - ctx["source"] = args.source - // const char *target - args.target = C.GoString(&mntTarget[0]) - ctx["target"] = args.target - // const char *filesystemtype - args.fstype = C.GoString(&mntFs[0]) - ctx["fstype"] = args.fstype - // unsigned long mountflags - args.flags = int(siov.req.data.args[3]) - // const void *data - args.data = C.GoString(&mntData[0]) - ctx["data"] = args.data - ok, fuseBinary := s.MountSyscallValid(c, &args) if !ok { ctx["syscall_continue"] = "true" From 1dd4a0b1705fcf55f581ff0dcfceb5ec24979098 Mon Sep 17 00:00:00 2001 From: Christian Brauner <christian.brau...@ubuntu.com> Date: Fri, 16 Oct 2020 10:54:26 +0200 Subject: [PATCH 2/3] nsexec: simplify userns attach Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com> --- lxd/main_nsexec.go | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/lxd/main_nsexec.go b/lxd/main_nsexec.go index 3918547b57..3bfe019832 100644 --- a/lxd/main_nsexec.go +++ b/lxd/main_nsexec.go @@ -142,7 +142,7 @@ int preserve_ns(pid_t pid, int ns_fd, const char *ns) // If the two processes are not in the same namespace returns an fd to the // namespace of the second process identified by @pid2. If the two processes are // in the same namespace returns -EINVAL, -1 if an error occurred. -static int in_same_namespace(pid_t pid1, pid_t pid2, int ns_fd_pid2, const char *ns) +static int in_same_namespace(pid_t pid1, int ns_fd_pid2, const char *ns) { __do_close int ns_fd1 = -EBADF, ns_fd2 = -EBADF; int ret = -1; @@ -158,7 +158,7 @@ static int in_same_namespace(pid_t pid1, pid_t pid2, int ns_fd_pid2, const char return -1; } - ns_fd2 = preserve_ns(pid2, ns_fd_pid2, ns); + ns_fd2 = preserve_ns(-ESRCH, ns_fd_pid2, ns); if (ns_fd2 < 0) return -1; @@ -178,12 +178,12 @@ static int in_same_namespace(pid_t pid1, pid_t pid2, int ns_fd_pid2, const char return move_fd(ns_fd2); } -static void __attach_userns(int pid, int ns_fd) +void attach_userns_fd(int ns_fd) { __do_close int userns_fd = -EBADF; int ret; - userns_fd = in_same_namespace(getpid(), pid, ns_fd, "user"); + userns_fd = in_same_namespace(getpid(), ns_fd, "user"); if (userns_fd < 0) { if (userns_fd == -EINVAL) return; @@ -216,16 +216,6 @@ static void __attach_userns(int pid, int ns_fd) } } -void attach_userns(int pid) -{ - return __attach_userns(pid, -EBADF); -} - -void attach_userns_fd(int ns_fd) -{ - return __attach_userns(-1, ns_fd); -} - int pidfd_nsfd(int pidfd, pid_t pid) { __do_close int ns_fd = -EBADF; From d57813b900c4a9ae3b55b22fb87baf18e1780f2b Mon Sep 17 00:00:00 2001 From: Christian Brauner <christian.brau...@ubuntu.com> Date: Fri, 16 Oct 2020 12:17:16 +0200 Subject: [PATCH 3/3] forksyscall: preserve root and cwd fds for shifted mount emulation Otherwise we won't be able to reacquire the root and current working directory of the target task. Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com> --- lxd/main_forksyscall.go | 43 +++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/lxd/main_forksyscall.go b/lxd/main_forksyscall.go index a5b9c565da..be2839f2a0 100644 --- a/lxd/main_forksyscall.go +++ b/lxd/main_forksyscall.go @@ -57,7 +57,8 @@ static bool chdirchroot_in_mntns(int cwd_fd, int root_fd) return true; } -static bool acquire_basic_creds(pid_t pid, int pidfd, int ns_fd) +static bool acquire_basic_creds(pid_t pid, int pidfd, int ns_fd, + int *rootfd, int *cwdfd) { __do_close int cwd_fd = -EBADF, root_fd = -EBADF; char buf[256]; @@ -75,7 +76,27 @@ static bool acquire_basic_creds(pid_t pid, int pidfd, int ns_fd) if (!change_namespaces(pidfd, ns_fd, CLONE_NEWNS)) return false; - return chdirchroot_in_mntns(cwd_fd, root_fd); + if (!chdirchroot_in_mntns(cwd_fd, root_fd)) + return false; + + if (rootfd) + *rootfd = move_fd(root_fd); + + if (cwdfd) + *cwdfd = move_fd(cwd_fd); + return true; +} + +static bool reacquire_basic_creds(int pidfd, int ns_fd, + int root_fd, int cwd_fd) +{ + if (!change_namespaces(pidfd, ns_fd, CLONE_NEWNS)) + return false; + + if (!chdirchroot_in_mntns(cwd_fd, root_fd)) + return false; + + return true; } static bool acquire_final_creds(pid_t pid, uid_t uid, gid_t gid, uid_t fsuid, gid_t fsgid) @@ -157,7 +178,7 @@ static void mknod_emulate(void) fsuid = atoi(advance_arg(true)); fsgid = atoi(advance_arg(true)); - if (!acquire_basic_creds(pid, pidfd, ns_fd)) { + if (!acquire_basic_creds(pid, pidfd, ns_fd, NULL, NULL)) { fprintf(stderr, "%d", ENOANO); _exit(EXIT_FAILURE); } @@ -257,7 +278,7 @@ static void setxattr_emulate(void) size = atoi(advance_arg(true)); data = advance_arg(true); - if (!acquire_basic_creds(pid, pidfd, ns_fd)) { + if (!acquire_basic_creds(pid, pidfd, ns_fd, NULL, NULL)) { fprintf(stderr, "%d", ENOANO); _exit(EXIT_FAILURE); } @@ -336,7 +357,8 @@ static int make_tmpfile(char *template, bool dir) static void mount_emulate(void) { - __do_close int mnt_fd = -EBADF, pidfd = -EBADF, ns_fd = -EBADF; + __do_close int mnt_fd = -EBADF, pidfd = -EBADF, ns_fd = -EBADF, + root_fd = -EBADF, cwd_fd = -EBADF; char *source = NULL, *shiftfs = NULL, *target = NULL, *fstype = NULL; bool use_fuse; uid_t nsuid = -1, uid = -1, nsfsuid = -1, fsuid = -1; @@ -384,7 +406,7 @@ static void mount_emulate(void) change_namespaces(pidfd, ns_fd, CLONE_NEWPID); } - if (!acquire_basic_creds(pid, pidfd, ns_fd)) + if (!acquire_basic_creds(pid, pidfd, ns_fd, &root_fd, &cwd_fd)) _exit(EXIT_FAILURE); if (!acquire_final_creds(pid, uid, gid, fsuid, fsgid)) @@ -441,8 +463,13 @@ static void mount_emulate(void) _exit(EXIT_FAILURE); } - attach_userns_fd(ns_fd); - if (!acquire_basic_creds(pid, pidfd, ns_fd)) { + if (!change_namespaces(pidfd, ns_fd, CLONE_NEWUSER)) { + umount2(target, MNT_DETACH); + umount2(target, MNT_DETACH); + _exit(EXIT_FAILURE); + } + + if (!reacquire_basic_creds(pidfd, ns_fd, root_fd, cwd_fd)) { umount2(target, MNT_DETACH); umount2(target, MNT_DETACH); _exit(EXIT_FAILURE);
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel