On mån, 2015-03-02 at 11:09 -0600, Eric W. Biederman wrote:
> Alexander Larsson <[email protected]> writes:
> >
> > I am able to do a bind mount of the system one, *if* i pass in MS_REC
> > (which is not necessarily what i want), but I then later fail when
> > trying to remount it read-only.
> 
> MS_REC should be only required if there is something mounted on top of
> one of the files in sysfs.  It sounds like there is, and exposing that
> file would be a permission issue.
> 
> Remount read-only comes in two flavors.  A bind mount remount read-only
> which you should be able to perform as non-root and a remount the
> filesystem read-only for everyone.  I suspect you simply didn't specify
> MS_BIND | MS_RDONLY when attempting to remount sysfs.

I've attached a simple test app that tries to bind mount /sys and
remount it readonly. It fails with EPERM.

The mounts i have over /sys are:

15 57 0:15 / /sys rw,nosuid,nodev,noexec,relatime shared:6 - sysfs sysfs 
rw,seclabel
18 15 0:16 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:7 - 
securityfs securityfs rw
22 15 0:19 / /sys/fs/cgroup ro,nosuid,nodev,noexec shared:8 - tmpfs tmpfs 
ro,seclabel,mode=755
23 22 0:20 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:9 - 
cgroup cgroup 
rw,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
24 15 0:21 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:19 - pstore 
pstore rw
25 22 0:22 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:10 - 
cgroup cgroup rw,cpuset
26 22 0:23 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime 
shared:11 - cgroup cgroup rw,cpu,cpuacct
27 22 0:24 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:12 - 
cgroup cgroup rw,memory
28 22 0:25 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:13 - 
cgroup cgroup rw,devices
29 22 0:26 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:14 - 
cgroup cgroup rw,freezer
30 22 0:27 / /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime 
shared:15 - cgroup cgroup rw,net_cls,net_prio
31 22 0:28 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:16 - 
cgroup cgroup rw,blkio
32 22 0:29 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime 
shared:17 - cgroup cgroup rw,perf_event
33 22 0:30 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime shared:18 - 
cgroup cgroup rw,hugetlb
54 15 0:31 / /sys/kernel/config rw,relatime shared:20 - configfs configfs rw
34 15 0:14 / /sys/fs/selinux rw,relatime shared:21 - selinuxfs selinuxfs rw
38 15 0:6 / /sys/kernel/debug rw,relatime shared:26 - debugfs debugfs rw
202 15 0:48 / /sys/fs/fuse/connections rw,relatime shared:147 - fusectl fusectl 
rw

Also, I'd like to make all the recursively bound subtrees readonly. Is
there a better way to do this than enumerating all mounts and remounting
all that are under /sys. 

In fact this is a general problem i have with recursive bind mounts. If
I want to grant access to some directory with limited access (for
example read-only or nosuid) then I have to use a recursive bind mount,
but the remount is not recursive, and furthermore, it does not apply to
later mounts that get propagated into my namespace. 

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 Alexander Larsson                                            Red Hat, Inc 
       [email protected]            [email protected] 
He's a world-famous flyboy werewolf with a passion for fast cars. She's 
an enchanted junkie vampire from aristocratic European stock. They fight 
crime! 
#define _GNU_SOURCE /* Required for CLONE_NEWNS */
#include <assert.h>
#include <arpa/inet.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/loop.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <net/if.h>
#include <netinet/in.h>
#include <sched.h>
#include <signal.h>
#include <poll.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/eventfd.h>
#include <sys/signalfd.h>
#include <sys/capability.h>
#include <sys/prctl.h>
#include <unistd.h>

static void
die_with_error (const char *format, ...)
{
  va_list args;
  int errsv;

  errsv = errno;

  va_start (args, format);
  vfprintf (stderr, format, args);
  va_end (args);

  fprintf (stderr, ": %s\n", strerror (errsv));

  exit (1);
}

static void
die (const char *format, ...)
{
  va_list args;

  va_start (args, format);
  vfprintf (stderr, format, args);
  va_end (args);

  fprintf (stderr, "\n");

  exit (1);
}

static char*
strdup_printf (const char *format,
               ...)
{
  char *buffer = NULL;
  va_list args;

  va_start (args, format);
  vasprintf (&buffer, format, args);
  va_end (args);

  if (buffer == NULL)
    die ("oom");

  return buffer;
}

static inline int raw_clone(unsigned long flags, void *child_stack) {
#if defined(__s390__) || defined(__CRIS__)
        /* On s390 and cris the order of the first and second arguments
         * of the raw clone() system call is reversed. */
        return (int) syscall(__NR_clone, child_stack, flags);
#else
        return (int) syscall(__NR_clone, flags, child_stack);
#endif
}

static int
write_to_file (int fd, const char *content)
{
  ssize_t len = strlen (content);
  ssize_t res;

  while (len > 0)
    {
      res = write (fd, content, len);
      if (res < 0 && errno == EINTR)
	continue;
      if (res <= 0)
	return -1;
      len -= res;
      content += res;
    }

  return 0;
}

static int
write_file (const char *path, const char *content)
{
  int fd;
  int res;

  fd = open (path, O_RDWR | O_CLOEXEC, 0);
  if (fd == -1)
    return -1;

  res = 0;
  if (content)
    res = write_to_file (fd, content);

  close (fd);

  return res;
}

int
main (int argc,
      char **argv)
{
  char *args[] = { "/bin/sh", NULL };
  pid_t pid;
  char *uid_map, *gid_map;
  int uid, gid;

  uid = getuid();
  gid = getgid();

  pid = raw_clone (SIGCHLD | CLONE_NEWUSER | CLONE_NEWNS,
                   NULL);
  if (pid == -1)
    die_with_error ("Creating new namespace failed");

  if (pid != 0)
    {
      int status;
      wait(&status);
      exit (0); /* Should not be reached, but better safe... */
    }

  if (write_file("/proc/self/setgroups", "deny\n") < 0)
    die_with_error ("error writing to setgroups");

  uid_map = strdup_printf ("%d %d 1\n", uid, uid);
  if (write_file ("/proc/self/uid_map", uid_map) < 0)
    die_with_error ("setting up uid map");
  free (uid_map);

  gid_map = strdup_printf ("%d %d 1\n", gid, gid);
  if (write_file ("/proc/self/gid_map", gid_map) < 0)
    {
      int errsv = errno;
      fprintf (stderr, "error writing to gid_map: %s, content: %s", strerror (errsv), gid_map);
    }
  free (gid_map);

  if (mkdir ("/tmp/foo", 0755) && errno != EEXIST)
    die_with_error ("unable to create tmp");

  if (mount ("", "/tmp/foo", "tmpfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL) != 0)
    die_with_error ("Failed to mount tmpfs");

  if (mkdir ("/tmp/foo/sys", 0755))
    die_with_error ("unable to mkdir sys");

  if (mount ("/sys", "/tmp/foo/sys", NULL, MS_MGC_VAL|MS_BIND|MS_REC, NULL) != 0)
    {
      int errsv = errno;
      fprintf (stderr, "error bind mounting sys: %s\n", strerror (errsv));
    }

  if (mount ("none", "/tmp/foo/sys", NULL, MS_MGC_VAL|MS_BIND|MS_REMOUNT|MS_NODEV|MS_NOSUID|MS_RDONLY, NULL) != 0)
    {
      int errsv = errno;
      fprintf (stderr, "error remounting sys: %s\n", strerror (errsv));
    }

  if (execv ("/bin/sh", args) == -1)
    die_with_error ("execvp %s", args[0]);

  printf ("end??\n");
  return 1;
}
_______________________________________________
gnome-os-list mailing list
[email protected]
https://mail.gnome.org/mailman/listinfo/gnome-os-list

Reply via email to