All modern distributions require devtmpfs in /dev. devtmpfs can't be mounted from userns. This patch bind-mounts the host /dev. It's secure, because permissions are handled according with uid and gid maps for the user namespace.
This patch removes old hacks about devices. They are not required any more. Signed-off-by: Andrey Vagin <ava...@openvz.org> --- etc/dists/scripts/prestart.sh | 4 --- src/lib/hooks_ct.c | 66 +++++++++++-------------------------------- 2 files changed, 16 insertions(+), 54 deletions(-) diff --git a/etc/dists/scripts/prestart.sh b/etc/dists/scripts/prestart.sh index 5ab7895..8b2a0a3 100755 --- a/etc/dists/scripts/prestart.sh +++ b/etc/dists/scripts/prestart.sh @@ -41,10 +41,6 @@ fixup_udev() fi break done - - umount /dev/pts - umount /dev/shm - umount /dev -l } fixup_loginuid() diff --git a/src/lib/hooks_ct.c b/src/lib/hooks_ct.c index 2a0b54c..ab2f4fd 100644 --- a/src/lib/hooks_ct.c +++ b/src/lib/hooks_ct.c @@ -10,6 +10,7 @@ #include <fcntl.h> #include <sched.h> #include <dirent.h> +#include <sys/vfs.h> #include "vzerror.h" #include "env.h" @@ -108,7 +109,7 @@ int ct_chroot(const char *root) * Linux kernel commit 5ff9d8a6 * "vfs: Lock in place mounts from more privileged users" */ - if (mount(root, root, NULL, MS_BIND, NULL)) { + if (mount(root, root, NULL, MS_BIND | MS_REC, NULL)) { logger(-1, errno, "Can't bind-mount root %s", root); return ret; } @@ -269,51 +270,6 @@ out: return ret; } -/* - * Those devices should exist in the container, and be valid device nodes with - * user access permission. But we need to be absolutely sure this is the case, - * so we will provide our own versions. That could actually happen since some - * distributions may come with emptied /dev's, waiting for udev to populate them. - * That won't happen, we do it ourselves. - */ -static void create_devices(vps_handler *h, envid_t veid, const char *root) -{ - unsigned int i; - char *devices[] = { - "/dev/null", - "/dev/zero", - "/dev/random", - "/dev/urandom", - }; - - /* - * We will tolerate errors, and keep the container running, because it is - * likely we will be able to boot it to a barely functional state. But - * be vocal about it - */ - for (i = 0; i < ARRAY_SIZE(devices); i++) { - char ct_devname[STR_SIZE]; - int ret; - - snprintf(ct_devname, sizeof(ct_devname), "%s%s", root, devices[i]); - - /* - * No need to be crazy about file flags. When we bind mount, the - * source permissions will be inherited. - */ - ret = open(ct_devname, O_RDWR|O_CREAT, 0); - if (ret < 0) { - logger(-1, errno, "Could not touch device %s", devices[i]); - continue; - } - close(ret); - - ret = mount(devices[i], ct_devname, "", MS_BIND, 0); - if (ret < 0) - logger(-1, errno, "Could not bind mount device %s", devices[i]); - } -} - static int _env_create(void *data) { struct arg_start *arg = data; @@ -338,10 +294,6 @@ static int _env_create(void *data) if (arg->userns_p != -1) close(arg->userns_p); - if (arg->h->can_join_userns) { - create_devices(arg->h, arg->veid, arg->res->fs.root); - } - ret = ct_chroot(arg->res->fs.root); /* Probably means chroot failed */ if (ret) @@ -400,11 +352,25 @@ static int ct_env_create_real(struct arg_start *arg) userns_p[0] = userns_p[1] = -1; } else { + char devpath[PATH_MAX]; + clone_flags |= CLONE_NEWUSER; if (pipe(userns_p) < 0) { logger(-1, errno, "Can not create userns pipe"); return VZ_RESOURCE_ERROR; } + + /* Unshare mntns to not affect the host system */ + if (unshare(CLONE_NEWNS)) { + logger(-1, errno, "Can not unshare mount namespace"); + return VZ_RESOURCE_ERROR; + } + + snprintf(devpath, sizeof(devpath), "%s/dev", arg->res->fs.root); + if (mount("dev", devpath, "devtmpfs", 0, NULL)) { + logger(-1, errno, "Can not mount devtmpfs"); + return VZ_RESOURCE_ERROR; + } } arg->userns_p = userns_p[0]; -- 1.8.3.1 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel