The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/7722
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === In nested workloads where the outer LXD hasn't used shiftfs the container's rootfs won't be marked for shifting and since the inner container hasn't mounted the rootfs itself it's user namespace doesn't own the superblock of the rootfs filesystem. Since we require CAP_SYS_ADMIN in the owning user namespace of the superblock shiftfs isn't useable for the inner LXD. Handle that case. Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>
From 3003758417555f58062eeb75e4494623a31e2f8a Mon Sep 17 00:00:00 2001 From: Christian Brauner <christian.brau...@ubuntu.com> Date: Tue, 4 Aug 2020 00:34:39 +0200 Subject: [PATCH] daemon: check whether shiftfs is useable In nested workloads where the outer LXD hasn't used shiftfs the container's rootfs won't be marked for shifting and since the inner container hasn't mounted the rootfs itself it's user namespace doesn't own the superblock of the rootfs filesystem. Since we require CAP_SYS_ADMIN in the owning user namespace of the superblock shiftfs isn't useable for the inner LXD. Handle that case. Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com> --- lxd/daemon.go | 2 +- lxd/main_checkfeature.go | 42 ++++++++++++++++++++++++++++++++++++++++ lxd/main_forksyscall.go | 22 +-------------------- lxd/main_nsexec.go | 2 +- 4 files changed, 45 insertions(+), 23 deletions(-) diff --git a/lxd/daemon.go b/lxd/daemon.go index 9e14c168a5..dde10b17e4 100644 --- a/lxd/daemon.go +++ b/lxd/daemon.go @@ -694,7 +694,7 @@ func (d *Daemon) init() error { if shared.IsTrue(os.Getenv("LXD_SHIFTFS_DISABLE")) { logger.Infof(" - shiftfs support: disabled") } else { - if util.HasFilesystem("shiftfs") || util.LoadModule("shiftfs") == nil { + if canUseShiftfs() && (util.HasFilesystem("shiftfs") || util.LoadModule("shiftfs") == nil) { d.os.Shiftfs = true logger.Infof(" - shiftfs support: yes") } else { diff --git a/lxd/main_checkfeature.go b/lxd/main_checkfeature.go index b2576b6296..ffabdfe344 100644 --- a/lxd/main_checkfeature.go +++ b/lxd/main_checkfeature.go @@ -42,10 +42,12 @@ __ro_after_init bool netnsid_aware = false; __ro_after_init bool pidfd_aware = false; __ro_after_init bool uevent_aware = false; __ro_after_init int seccomp_notify_aware = 0; +__ro_after_init bool shiftfs_useable = false; __ro_after_init char errbuf[4096]; extern int can_inject_uevent(const char *uevent, size_t len); extern int wait_for_pid(pid_t pid); +extern int preserve_ns(pid_t pid, int ns_fd, const char *ns); static int netns_set_nsid(int fd) { @@ -326,6 +328,37 @@ static void is_pidfd_aware(void) pidfd_aware = true; } +static void is_shiftfs_useable(void) +{ + int hostmntns_fd; + + hostmntns_fd = preserve_ns(getpid(), -EBADF, "mnt"); + if (hostmntns_fd < 0) { + (void)sprintf(errbuf, "%m - %s", "Failed to preserve host mount namespace"); + return; + } + + if (unshare(CLONE_NEWNS) < 0) { + (void)sprintf(errbuf, "%m - %s", "Failed to unshare mount namespace"); + return; + } + + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0) < 0) { + (void)sprintf(errbuf, "%m - %s", "Failed to turn \"/\" into private mount"); + goto on_error; + } + + if (mount("/", "/", "shiftfs", 0, "mark") == 0) + shiftfs_useable = true; + else + (void)sprintf(errbuf, "%m - %s", "Failed to mount shiftfs"); + umount2("/", MNT_DETACH); + +on_error: + if (setns(hostmntns_fd, CLONE_NEWNS) < 0) + (void)sprintf(errbuf, "%m - %s", "Failed to attach to host mount namespace"); +} + void checkfeature(void) { __do_close int hostnetns_fd = -EBADF, newnetns_fd = -EBADF; @@ -334,6 +367,7 @@ void checkfeature(void) is_pidfd_aware(); is_uevent_aware(); is_seccomp_notify_aware(); + is_shiftfs_useable(); if (setns(hostnetns_fd, CLONE_NEWNET) < 0) (void)sprintf(errbuf, "%s", "Failed to attach to host network namespace"); @@ -370,3 +404,11 @@ func canUseSeccompListenerContinue() bool { func canUsePidFds() bool { return bool(C.pidfd_aware) } + +func canUseShiftfs() bool { + if !bool(C.shiftfs_useable) { + logger.Debugf("%s", C.GoString(&C.errbuf[0])) + } + + return bool(C.shiftfs_useable) +} diff --git a/lxd/main_forksyscall.go b/lxd/main_forksyscall.go index aa76fe36f0..bb148fa7d2 100644 --- a/lxd/main_forksyscall.go +++ b/lxd/main_forksyscall.go @@ -36,6 +36,7 @@ extern char* advance_arg(bool required); extern void attach_userns_fd(int ns_fd); extern int pidfd_nsfd(int pidfd, pid_t pid); extern bool setnsat(int ns_fd, const char *ns); +extern int preserve_ns(const int pid, const char *ns); static bool chdirchroot_in_mntns(int cwd_fd, int root_fd) { @@ -337,27 +338,6 @@ static int make_tmpfile(char *template, bool dir) return 0; } -static int preserve_ns(const int pid, const char *ns) -{ - int ret; -// 5 /proc + 21 /int_as_str + 3 /ns + 20 /NS_NAME + 1 \0 -#define __NS_PATH_LEN 50 - char path[__NS_PATH_LEN]; - - // This way we can use this function to also check whether namespaces - // are supported by the kernel by passing in the NULL or the empty - // string. - ret = snprintf(path, __NS_PATH_LEN, "/proc/%d/ns%s%s", pid, - !ns || strcmp(ns, "") == 0 ? "" : "/", - !ns || strcmp(ns, "") == 0 ? "" : ns); - if (ret < 0 || (size_t)ret >= __NS_PATH_LEN) { - errno = EFBIG; - return -1; - } - - return open(path, O_RDONLY | O_CLOEXEC); -} - static void mount_emulate(void) { __do_close int mnt_fd = -EBADF, pidfd = -EBADF, ns_fd = -EBADF; diff --git a/lxd/main_nsexec.go b/lxd/main_nsexec.go index 5a652722e2..bbd148ba5a 100644 --- a/lxd/main_nsexec.go +++ b/lxd/main_nsexec.go @@ -144,7 +144,7 @@ int dosetns_file(char *file, char *nstype) { return 0; } -static int preserve_ns(pid_t pid, int ns_fd, const char *ns) +int preserve_ns(pid_t pid, int ns_fd, const char *ns) { int ret; if (ns_fd >= 0)
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel