Avoided leaking file descriptors in Mesos containerizer. This patch explicitly closes not required file descriptors when forking a Mesos containerizer instance. We currently only pass on stdin, stout, and sterr.
While it would in principle be possible to open all file descriptors with `O_CLOEXEC`, this is not practical as * `O_CLOEXEC` is not supported on BSDs (not Windows either, but file descriptor leaks are prevented there in a different way), and * we might call thirdparty code outside of our control which does not use e.g., `O_CLOEXEC` either (the case e.g., for leveldb). Closing all file descriptors after the fork avoids leaks even in above scenarios. Review: https://reviews.apache.org/r/67137/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/3f60c985 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/3f60c985 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/3f60c985 Branch: refs/heads/master Commit: 3f60c9857f8f68c3d2330ff3d03c8aa84b630db7 Parents: a666047 Author: Benjamin Bannier <benjamin.bann...@mesosphere.io> Authored: Mon Jun 25 20:08:48 2018 +0200 Committer: Benjamin Bannier <bbann...@apache.org> Committed: Mon Jun 25 20:08:48 2018 +0200 ---------------------------------------------------------------------- src/slave/containerizer/mesos/launch.cpp | 56 ++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/3f60c985/src/slave/containerizer/mesos/launch.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/launch.cpp b/src/slave/containerizer/mesos/launch.cpp index cec6558..65b795a 100644 --- a/src/slave/containerizer/mesos/launch.cpp +++ b/src/slave/containerizer/mesos/launch.cpp @@ -35,8 +35,8 @@ #include <stout/foreach.hpp> #include <stout/option.hpp> #include <stout/os.hpp> -#include <stout/protobuf.hpp> #include <stout/path.hpp> +#include <stout/protobuf.hpp> #include <stout/stringify.hpp> #include <stout/try.hpp> #include <stout/unreachable.hpp> @@ -74,6 +74,7 @@ using std::cerr; using std::endl; +using std::list; using std::set; using std::string; using std::vector; @@ -516,6 +517,40 @@ static Try<Nothing> enterChroot(const string& rootfs) } +// On Windows all new processes create by Mesos go through the +// `create_process` wrapper which with the completion of MESOS-8926 +// will prevent inadvertent leaks making this code unnecessary there. +// +// TODO(bbannier): Consider moving this to stout as e.g., `os::lsof`. +#ifndef __WINDOWS__ +static Try<hashset<int_fd>> getOpenFileDescriptors() +{ + Try<list<string>> fds = +#if defined(__linux__) + os::ls("/proc/self/fd"); +#elif defined(__APPLE__) + os::ls("/dev/fd"); +#endif + + if (fds.isError()) { + return Error(fds.error()); + } + + hashset<int_fd> result; + foreach (const string& fd, fds.get()) { + Try<int_fd> fd_ = numify<int_fd>(fd); + + if (fd_.isError()) { + return Error("Could not interpret file descriptor: " + fd_.error()); + } + + result.insert(fd_.get()); + } + + return result; +} +#endif // __WINDOWS__ + int MesosContainerizerLaunch::execute() { if (flags.help) { @@ -1067,6 +1102,18 @@ int MesosContainerizerLaunch::execute() } #ifndef __WINDOWS__ + // Construct a set of file descriptors to close before `exec`'ing. + Try<hashset<int_fd>> fds = getOpenFileDescriptors(); + CHECK_SOME(fds); + + std::set<int_fd> whitelistedFds = { + STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO}; + + // Exclude required file descriptors from closing. + foreach (int_fd fd, whitelistedFds) { + fds->erase(fd); + } + // If we have `containerStatusFd` set, then we need to fork-exec the // command we are launching and checkpoint its status on exit. We // use fork-exec directly (as opposed to `process::subprocess()`) to @@ -1137,6 +1184,13 @@ int MesosContainerizerLaunch::execute() os::close(containerStatusFd.get()); ::_exit(EXIT_SUCCESS); } + + // Avoid leaking not required file descriptors into the forked process. + foreach (int_fd fd, fds.get()) { + // We use the unwrapped `::close` as opposed to `os::close` + // since the former is guaranteed to be async signal safe. + ::close(fd); + } } #endif // __WINDOWS__