Add a fully featured unshare implementation implementing all arguments
supported in the upstream version.

Signed-off-by: Bartosz Golaszewski <bartekg...@gmail.com>
---
 util-linux/unshare.c | 453 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 453 insertions(+)
 create mode 100644 util-linux/unshare.c

diff --git a/util-linux/unshare.c b/util-linux/unshare.c
new file mode 100644
index 0000000..ea9ec9c
--- /dev/null
+++ b/util-linux/unshare.c
@@ -0,0 +1,453 @@
+/* vi: set sw=4 ts=4: */
+/*
+ * Mini unshare implementation for busybox.
+ *
+ * Copyright (C) 2016 by Bartosz Golaszewski <bartekg...@gmail.com>
+ *
+ * Licensed under GPLv2 or later, see file LICENSE in this source tree.
+ */
+
+//config:config UNSHARE
+//config:      bool "unshare"
+//config:      default y
+//config:      select PLATFORM_LINUX
+//config:      help
+//config:        Run program with some namespaces unshared from parent.
+//config:
+//config:config FEATURE_UNSHARE_LONG_OPTS
+//config:      bool "enable long options"
+//config:      default y
+//config:      depends on UNSHARE && LONG_OPTS
+//config:      help
+//config:        Support long options for the unshare applet. This makes
+//config:        the busybox implementation more compatible with upstream.
+
+//applet:IF_UNSHARE(APPLET(unshare, BB_DIR_USR_BIN, BB_SUID_DROP))
+
+//kbuild:lib-$(CONFIG_UNSHARE) += unshare.o
+
+//usage:#define unshare_trivial_usage
+//usage:       "[options] <program> [args...]"
+//usage:#if ENABLE_FEATURE_UNSHARE_LONG_OPTS
+//usage:#define unshare_full_usage "\n\n"
+//usage:       "Options:"
+//usage:     "\n       -m, --mount[=<file>]            unshare mounts 
namespace"
+//usage:     "\n       -u, --uts[=<file>]              unshare UTS namespace 
(hostname etc.)"
+//usage:     "\n       -i, --ipc[=<file>]              unshare System V IPC 
namespace"
+//usage:     "\n       -n, --network[=<file>]          unshare network 
namespace"
+//usage:     "\n       -p, --pid[=<file>]              unshare pid namespace"
+//usage:     "\n       -U, --user[=<file>]             unshare user namespace"
+//usage:     "\n       -f, --fork                      fork before launching 
<program>"
+//usage:     "\n       -M, --mount-proc[=<dir>]        mount proc filesystem 
first (implies --mount)"
+//usage:     "\n       -r, --map-root-user             map current user to 
root (implies --user)"
+//usage:     "\n       -P, --propagation slave|shared|private|unchanged"
+//usage:     "\n                                       modify mount 
propagation in mount namespace"
+//usage:     "\n       -s, --setgroups allow|deny      control the setgroups 
syscall in user namespaces"
+//usage:#else
+//usage:#define unshare_full_usage "\n\n"
+//usage:       "Options:"
+//usage:     "\n       -m [<file>]     unshare mounts namespace"
+//usage:     "\n       -u [<file>]     unshare UTS namespace (hostname etc.)"
+//usage:     "\n       -i [<file>]     unshare System V IPC namespace"
+//usage:     "\n       -n [<file>]     unshare network namespace"
+//usage:     "\n       -p [<file>]     unshare pid namespace"
+//usage:     "\n       -U [<file>]     unshare user namespace"
+//usage:     "\n       -f              fork before launching <program>"
+//usage:     "\n       -M [<dir>]      mount proc filesystem first (implies 
-m)"
+//usage:     "\n       -r              map current user to root (implies -u)"
+//usage:     "\n       -P slave|shared|private|unchanged"
+//usage:     "\n                       modify mount propagation in mount 
namespace"
+//usage:     "\n       -s allow|deny   ontrol the setgroups syscall in user 
namespaces"
+//usage:#endif
+
+#include "libbb.h"
+
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+
+/*
+ * Longest possible path to a procfs file used in unshare. Must be able to
+ * contain the '/proc/' string, the '/ns/user' string which is the longest
+ * namespace name and a 32-bit integer representing the process ID.
+ */
+#define PROC_PATH_MAX   (sizeof("/proc//ns/user") + sizeof(pid_t) * 3)
+
+#define PATH_PROC_SETGROUPS    "/proc/self/setgroups"
+#define PATH_PROC_UIDMAP       "/proc/self/uid_map"
+#define PATH_PROC_GIDMAP       "/proc/self/gid_map"
+
+enum {
+       OPT_mount       = BIT( 0),
+       OPT_uts         = BIT( 1),
+       OPT_ipc         = BIT( 2),
+       OPT_network     = BIT( 3),
+       OPT_pid         = BIT( 4),
+       OPT_user        = BIT( 5),
+       OPT_fork        = BIT( 6),
+       OPT_mount_proc  = BIT( 7),
+       OPT_map_root    = BIT( 8),
+       OPT_propagation = BIT( 9),
+       OPT_setgroups   = BIT(10),
+};
+
+enum {
+       NS_MNT_POS = 0,
+       NS_UTS_POS,
+       NS_IPC_POS,
+       NS_NET_POS,
+       NS_PID_POS,
+       NS_USR_POS,
+};
+
+struct namespace {
+       const int opt;
+       const int flag;
+       const char *nsfile;
+       char *path;
+};
+
+struct propagation_mode {
+       const char *name;
+       unsigned long flags;
+};
+
+/*
+ * Upstream unshare doesn't support short options for --mount-proc and
+ * --propagation, but let's add them here to let the user use them even with
+ * long options disabled in busybox config.
+ */
+static const char opt_str[] = "+m::u::i::n::p::U::fM::rP:s:";
+
+/*
+ * Upstream unshare only accepts optional arguments (namespace mountpoints)
+ * for long options. We support them for both short (for size reduction
+ * with LONG_OPTS disabled) and long opts (for upstream compatibility).
+ */
+#if ENABLE_FEATURE_UNSHARE_LONG_OPTS
+static const char unshare_longopts[] ALIGN1 =
+       "mount\0"               Optional_argument       "m"
+       "uts\0"                 Optional_argument       "u"
+       "ipc\0"                 Optional_argument       "i"
+       "network\0"             Optional_argument       "n"
+       "pid\0"                 Optional_argument       "p"
+       "user\0"                Optional_argument       "U"
+       "fork\0"                No_argument             "f"
+       "mount-proc\0"          Optional_argument       "M"
+       "map-root-user\0"       No_argument             "r"
+       "propagation\0"         Required_argument       "P"
+       "setgroups\0"           Required_argument       "s";
+#endif
+
+static unsigned long parse_propagation(const char *prop_str)
+{
+       static const struct propagation_mode prop_modes[] = {
+               {
+                       .name = "slave",
+                       .flags = MS_REC | MS_SLAVE,
+               },
+               {
+                       .name = "private",
+                       .flags = MS_REC | MS_PRIVATE,
+               },
+               {
+                       .name = "shared",
+                       .flags = MS_REC | MS_SHARED,
+               },
+               {
+                       .name = "unchanged",
+                       .flags = 0,
+               }
+       };
+
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(prop_modes); i++) {
+               if (strcmp(prop_modes[i].name, prop_str) == 0)
+                       return prop_modes[i].flags;
+       }
+
+       bb_error_msg_and_die("unsupported propagation mode: %s", prop_str);
+}
+
+static ino_t get_mnt_ns_inode_by_pid(pid_t pid)
+{
+       char path[PROC_PATH_MAX];
+       struct stat statbuf;
+
+       snprintf(path, sizeof(path), "/proc/%d/ns/mnt", pid);
+       xstat(path, &statbuf);
+
+       return statbuf.st_ino;
+}
+
+static void mount_namespaces(pid_t pid,
+                            struct namespace *ns_list, size_t num_ns)
+{
+       char nsf[PROC_PATH_MAX];
+       struct namespace *ns;
+       int i, status;
+
+       for (i = 0; i < num_ns; i++) {
+               ns = &ns_list[i];
+
+               if (!ns->path)
+                       continue;
+
+               snprintf(nsf, sizeof(nsf), "/proc/%d/ns/%s", pid, ns->nsfile);
+
+               status = mount(nsf, ns->path, NULL, MS_BIND, NULL);
+               if (status < 0) {
+                       bb_perror_msg_and_die("mount %s on %s failed",
+                                             nsf, ns->path);
+               }
+       }
+}
+
+static void mount_procfs(const char *target)
+{
+       int status;
+
+       status = mount("none", target, NULL, MS_PRIVATE | MS_REC, NULL);
+       if (status < 0)
+               goto mount_err;
+
+       status = mount("proc", target, "proc",
+                      MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL);
+       if (status < 0)
+               goto mount_err;
+
+       return;
+
+mount_err:
+       bb_perror_msg_and_die("mount %s failed", target);
+}
+
+int unshare_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
+int unshare_main(int argc UNUSED_PARAM, char **argv)
+{
+       static struct namespace ns_list[] = {
+               [NS_MNT_POS] = {
+                       .opt = OPT_mount,
+                       .flag = CLONE_NEWNS,
+                       .nsfile = "mnt",
+               },
+               [NS_UTS_POS] = {
+                       .opt = OPT_uts,
+                       .flag = CLONE_NEWUTS,
+                       .nsfile = "uts",
+               },
+               [NS_IPC_POS] = {
+                       .opt = OPT_ipc,
+                       .flag = CLONE_NEWIPC,
+                       .nsfile = "ipc",
+               },
+               [NS_NET_POS] = {
+                       .opt = OPT_network,
+                       .flag = CLONE_NEWNET,
+                       .nsfile = "net",
+               },
+               [NS_PID_POS] = {
+                       .opt = OPT_pid,
+                       .flag = CLONE_NEWPID,
+                       .nsfile = "pid",
+               },
+               [NS_USR_POS] = {
+                       .opt = OPT_user,
+                       .flag = CLONE_NEWUSER,
+                       .nsfile = "user",
+               },
+       };
+
+       int unsflags = 0, i, need_mount = 0, status, setgrp_allow = 0;
+       const char *proc_mnt_target = "/proc", *prop_str, *setgrp_str;
+       unsigned long prop_flags = MS_REC | MS_PRIVATE;
+       uid_t reuid = geteuid();
+       gid_t regid = getegid();
+       unsigned int opts;
+       pid_t pid = -1;
+
+       IF_FEATURE_UNSHARE_LONG_OPTS(applet_long_options = unshare_longopts);
+
+       opts = getopt32(argv, opt_str,
+                       &ns_list[NS_MNT_POS].path, &ns_list[NS_UTS_POS].path,
+                       &ns_list[NS_IPC_POS].path, &ns_list[NS_NET_POS].path,
+                       &ns_list[NS_PID_POS].path, &ns_list[NS_USR_POS].path,
+                       &proc_mnt_target, &prop_str, &setgrp_str);
+       argv += optind;
+
+       /*
+        * Mounting the proc filesystem before running the program implies
+        * creating a new mount namespace since the /proc mount would
+        * otherwise mess up existing programs on the system.
+        */
+       if (opts & OPT_mount_proc)
+               opts |= OPT_mount;
+
+       /* Mapping user and group IDs to root implies --user. */
+       if (opts & OPT_map_root)
+               opts |= OPT_user;
+
+       if (opts & OPT_setgroups) {
+               if (strcmp(setgrp_str, "allow") == 0) {
+                       setgrp_allow = 1;
+               } else if (strcmp(setgrp_str, "deny") == 0) {
+                       setgrp_allow = 0;
+               } else {
+                       bb_error_msg_and_die(
+                               "unsupported --setgroups argument '%s'",
+                               setgrp_str);
+               }
+       }
+
+       for (i = 0; i < ARRAY_SIZE(ns_list); i++) {
+               struct namespace *ns = &ns_list[i];
+
+               if (opts & ns->opt)
+                       unsflags |= ns->flag;
+
+               if (ns->path)
+                       need_mount = 1;
+       }
+
+       /* Silently ignore --propagation if --mount is not requested. */
+       if ((opts & OPT_propagation) && (opts & OPT_mount))
+               prop_flags = parse_propagation(prop_str);
+
+       /*
+        * Special case: if we were requested to unshare the mount namespace
+        * AND to make any namespace persistent (by bind mounting it) we need
+        * to spawn a child process which will wait for the parent to call
+        * unshare(), then mount parent's namespaces while still in the
+        * previous namespace.
+        */
+       if (need_mount && (opts & OPT_mount)) {
+               ino_t inop, inoc;
+               pid_t ppid;
+
+               /*
+                * Can't use getppid() in child, as we can be unsharing the
+                * pid namespace.
+                */
+               ppid = getpid();
+
+               /*
+                * Save current process' mount namespace file inode number. We
+                * will later use it in child process to check if it already
+                * changed meaning that this process already called unshare().
+                */
+               inop = get_mnt_ns_inode_by_pid(ppid);
+
+               pid = xfork();
+               if (pid == 0) {
+                       /*
+                        * Child - wait until parent calls unshare(). No issue
+                        * in busy-waiting - by the time we get here from
+                        * fork(), the parent has usually already unshared the
+                        * mount namespace. We should spin a few times at most.
+                        *
+                        * XXX Should probably use a pipe to notify the child
+                        * about completing unshare().
+                        */
+                       do {
+                               inoc = get_mnt_ns_inode_by_pid(ppid);
+                       } while (inoc == inop);
+
+                       /* Mount parent's unshared namespaces. */
+                       mount_namespaces(ppid, ns_list, ARRAY_SIZE(ns_list));
+
+                       return EXIT_SUCCESS;
+               } /* Parent continues. */
+       }
+
+       status = unshare(unsflags);
+       if (status < 0)
+               bb_perror_msg_and_die("unshare failed");
+
+       if (need_mount) {
+               /* Wait for the child to finish mounting the namespaces. */
+               if (opts & OPT_mount) {
+                       int exit_status;
+
+                       status = safe_waitpid(pid, &exit_status, 0);
+                       if (status < 0)
+                               bb_perror_msg_and_die("waitpid");
+
+                       if (WIFEXITED(exit_status) &&
+                           WEXITSTATUS(exit_status) != EXIT_SUCCESS)
+                               return WEXITSTATUS(status);
+               } else {
+                       /*
+                        * Regular way - we were requested to mount some other
+                        * namespaces: mount them after the call to unshare().
+                        */
+                       mount_namespaces(getpid(), ns_list,
+                                        ARRAY_SIZE(ns_list));
+               }
+       }
+
+       /*
+        * When we're unsharing the pid namespace, it's not the process that
+        * calls unshare() that is put into the new namespace, but its first
+        * child. The user may want to use this option to spawn a new process
+        * that'll become PID 1 in this new namespace.
+        */
+       if (opts & OPT_fork) {
+               int exit_status;
+
+               pid = xfork();
+               if (pid > 0) {
+                       status = safe_waitpid(pid, &exit_status, 0);
+                       if (status < 0)
+                               bb_perror_msg_and_die("waitpid");
+
+                       if (WIFEXITED(exit_status))
+                               return WEXITSTATUS(exit_status);
+                       else if (WIFSIGNALED(exit_status))
+                               kill(getpid(), WTERMSIG(exit_status));
+
+                       bb_error_msg_and_die("child exit failed");
+               } /* Child continues. */
+       }
+
+       if (opts & OPT_map_root) {
+               char uidmap_buf[sizeof(unsigned int) * 3 + sizeof(" 0 1")];
+
+               if ((opts & OPT_setgroups) && setgrp_allow) {
+                       bb_error_msg_and_die(
+                               "options --setgroups=allow and --map-root-user 
are mutually exclusive");
+               }
+
+               /*
+                * Since Linux 3.19 unprivileged writing of /proc/self/gid_map
+                * has s been disabled unless /proc/self/setgroups is written
+                * first to permanently disable the ability to call setgroups
+                * in that user namespace.
+                */
+               xopen_xwrite_close(PATH_PROC_SETGROUPS, "deny");
+               snprintf(uidmap_buf, COMMON_BUFSIZE, "%u 0 1", reuid);
+               xopen_xwrite_close(PATH_PROC_UIDMAP, uidmap_buf);
+               snprintf(uidmap_buf, COMMON_BUFSIZE, "%u 0 1", regid);
+               xopen_xwrite_close(PATH_PROC_GIDMAP, uidmap_buf);
+       } else if (opts & OPT_setgroups) {
+               xopen_xwrite_close(PATH_PROC_SETGROUPS, setgrp_str);
+       }
+
+       if (opts & OPT_mount) {
+               status = mount("none", "/", NULL, prop_flags, NULL);
+               if (status < 0) {
+                       bb_perror_msg_and_die(
+                               "cannot change root filesystem propagation");
+               }
+       }
+
+       if (opts & OPT_mount_proc)
+               mount_procfs(proc_mnt_target);
+
+       if (*argv) {
+               execvp(*argv, argv);
+               bb_perror_msg_and_die("failed to execute %s", *argv);
+       }
+
+       run_shell(getenv("SHELL"), 0, NULL, NULL);
+}
-- 
2.1.4

_______________________________________________
busybox mailing list
busybox@busybox.net
http://lists.busybox.net/mailman/listinfo/busybox

Reply via email to