Supporting using PID namespace in order to isolate the ebuild processes from host system, and make it possible to kill them all easily (similarly to cgroups but easier to use).
Bug: https://bugs.gentoo.org/659582 Signed-off-by: Michał Górny <mgo...@gentoo.org> --- lib/portage/const.py | 1 + lib/portage/package/ebuild/doebuild.py | 8 +++-- lib/portage/process.py | 48 +++++++++++++++++++++++--- man/make.conf.5 | 7 ++++ 4 files changed, 57 insertions(+), 7 deletions(-) New in v2: the code was made independent of mount-sandbox. Instead of making all mounts slaved, it just ensures that /proc is slaved for the purpose of remounting. Failure to slave-mount /proc is considered fatal, as the resulting setup will likely break ebuilds. diff --git a/lib/portage/const.py b/lib/portage/const.py index e0f93f7cc..ca66bc46e 100644 --- a/lib/portage/const.py +++ b/lib/portage/const.py @@ -174,6 +174,7 @@ SUPPORTED_FEATURES = frozenset([ "notitles", "parallel-fetch", "parallel-install", + "pid-sandbox", "prelink-checksums", "preserve-libs", "protect-owned", diff --git a/lib/portage/package/ebuild/doebuild.py b/lib/portage/package/ebuild/doebuild.py index e84a618d2..9917ac82c 100644 --- a/lib/portage/package/ebuild/doebuild.py +++ b/lib/portage/package/ebuild/doebuild.py @@ -1,4 +1,4 @@ -# Copyright 2010-2018 Gentoo Foundation +# Copyright 2010-2018 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 from __future__ import unicode_literals @@ -152,6 +152,7 @@ def _doebuild_spawn(phase, settings, actionmap=None, **kwargs): kwargs['networked'] = 'network-sandbox' not in settings.features or \ phase in _networked_phases or \ 'network-sandbox' in settings['PORTAGE_RESTRICT'].split() + kwargs['pidns'] = 'pid-sandbox' in settings.features if phase == 'depend': kwargs['droppriv'] = 'userpriv' in settings.features @@ -1482,7 +1483,7 @@ def _validate_deps(mysettings, myroot, mydo, mydbapi): # XXX Issue: cannot block execution. Deadlock condition. def spawn(mystring, mysettings, debug=False, free=False, droppriv=False, sesandbox=False, fakeroot=False, networked=True, ipc=True, - mountns=False, **keywords): + mountns=False, pidns=False, **keywords): """ Spawn a subprocess with extra portage-specific options. Optiosn include: @@ -1518,6 +1519,8 @@ def spawn(mystring, mysettings, debug=False, free=False, droppriv=False, @type ipc: Boolean @param mountns: Run this command inside mount namespace @type mountns: Boolean + @param pidns: Run this command in isolated PID namespace + @type pidns: Boolean @param keywords: Extra options encoded as a dict, to be passed to spawn @type keywords: Dictionary @rtype: Integer @@ -1551,6 +1554,7 @@ def spawn(mystring, mysettings, debug=False, free=False, droppriv=False, keywords['unshare_net'] = not networked keywords['unshare_ipc'] = not ipc keywords['unshare_mount'] = mountns + keywords['unshare_pid'] = pidns if not networked and mysettings.get("EBUILD_PHASE") != "nofetch" and \ ("network-sandbox-proxy" in features or "distcc" in features): diff --git a/lib/portage/process.py b/lib/portage/process.py index 46868f442..dee126c3c 100644 --- a/lib/portage/process.py +++ b/lib/portage/process.py @@ -223,7 +223,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False, uid=None, gid=None, groups=None, umask=None, logfile=None, path_lookup=True, pre_exec=None, close_fds=(sys.version_info < (3, 4)), unshare_net=False, - unshare_ipc=False, unshare_mount=False, cgroup=None): + unshare_ipc=False, unshare_mount=False, unshare_pid=False, + cgroup=None): """ Spawns a given command. @@ -264,6 +265,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False, @param unshare_mount: If True, mount namespace will be unshared and mounts will be private to the namespace @type unshare_mount: Boolean + @param unshare_pid: If True, PID ns will be unshared from the spawned process + @type unshare_pid: Boolean @param cgroup: CGroup path to bind the process to @type cgroup: String @@ -332,7 +335,7 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False, # This caches the libc library lookup in the current # process, so that it's only done once rather than # for each child process. - if unshare_net or unshare_ipc or unshare_mount: + if unshare_net or unshare_ipc or unshare_mount or unshare_pid: find_library("c") # Force instantiation of portage.data.userpriv_groups before the @@ -348,7 +351,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False, try: _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, pre_exec, close_fds, - unshare_net, unshare_ipc, unshare_mount, cgroup) + unshare_net, unshare_ipc, unshare_mount, unshare_pid, + cgroup) except SystemExit: raise except Exception as e: @@ -418,7 +422,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False, return 0 def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, - pre_exec, close_fds, unshare_net, unshare_ipc, unshare_mount, cgroup): + pre_exec, close_fds, unshare_net, unshare_ipc, unshare_mount, unshare_pid, + cgroup): """ Execute a given binary with options @@ -450,6 +455,8 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, @param unshare_mount: If True, mount namespace will be unshared and mounts will be private to the namespace @type unshare_mount: Boolean + @param unshare_pid: If True, PID ns will be unshared from the spawned process + @type unshare_pid: Boolean @param cgroup: CGroup path to bind the process to @type cgroup: String @rtype: None @@ -506,7 +513,7 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, f.write('%d\n' % os.getpid()) # Unshare (while still uid==0) - if unshare_net or unshare_ipc or unshare_mount: + if unshare_net or unshare_ipc or unshare_mount or unshare_pid: filename = find_library("c") if filename is not None: libc = LoadLibrary(filename) @@ -514,6 +521,7 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, # from /usr/include/bits/sched.h CLONE_NEWNS = 0x00020000 CLONE_NEWIPC = 0x08000000 + CLONE_NEWPID = 0x20000000 CLONE_NEWNET = 0x40000000 flags = 0 @@ -524,6 +532,9 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, if unshare_mount: # NEWNS = mount namespace flags |= CLONE_NEWNS + if unshare_pid: + # we also need mount namespace for slave /proc + flags |= CLONE_NEWPID | CLONE_NEWNS try: if libc.unshare(flags) != 0: @@ -531,6 +542,15 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, errno.errorcode.get(ctypes.get_errno(), '?')), noiselevel=-1) else: + if unshare_pid: + # pid namespace requires us to become init + # TODO: do init-ty stuff + # therefore, fork() ASAP + fork_ret = os.fork() + if fork_ret != 0: + pid, status = os.waitpid(fork_ret, 0) + assert pid == fork_ret + os._exit(status) if unshare_mount: # mark the whole filesystem as slave to avoid # mounts escaping the namespace @@ -541,6 +561,24 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, # TODO: should it be fatal maybe? writemsg("Unable to mark mounts slave: %d\n" % (mount_ret,), noiselevel=-1) + if unshare_pid: + # we need at least /proc being slave + s = subprocess.Popen(['mount', + '--make-slave', '/proc']) + mount_ret = s.wait() + if mount_ret != 0: + # can't proceed with shared /proc + writemsg("Unable to mark /proc slave: %d\n" % (mount_ret,), + noiselevel=-1) + os._exit(1) + # mount new /proc for our namespace + s = subprocess.Popen(['mount', + '-t', 'proc', 'proc', '/proc']) + mount_ret = s.wait() + if mount_ret != 0: + writemsg("Unable to mount new /proc: %d\n" % (mount_ret,), + noiselevel=-1) + os._exit(1) if unshare_net: # 'up' the loopback IFF_UP = 0x1 diff --git a/man/make.conf.5 b/man/make.conf.5 index 7cb5741ad..de04e5e34 100644 --- a/man/make.conf.5 +++ b/man/make.conf.5 @@ -558,6 +558,13 @@ Use finer\-grained locks when installing packages, allowing for greater parallelization. For additional parallelization, disable \fIebuild\-locks\fR. .TP +.B pid\-sandbox +Isolate the process space for the ebuild processes. This makes it +possible to cleanly kill all processes spawned by the ebuild. +Supported only on Linux. Requires PID and mount namespace support +in kernel. /proc is remounted inside the mount namespace to account +for new PID namespace. +.TP .B prelink\-checksums If \fBprelink\fR(8) is installed then use it to undo any prelinks on files before computing checksums for merge and unmerge. This feature is -- 2.19.1