On mån, 2015-03-02 at 11:09 -0600, Eric W. Biederman wrote:
> Alexander Larsson <[email protected]> writes:
> >
> > I am able to do a bind mount of the system one, *if* i pass in MS_REC
> > (which is not necessarily what i want), but I then later fail when
> > trying to remount it read-only.
>
> MS_REC should be only required if there is something mounted on top of
> one of the files in sysfs. It sounds like there is, and exposing that
> file would be a permission issue.
>
> Remount read-only comes in two flavors. A bind mount remount read-only
> which you should be able to perform as non-root and a remount the
> filesystem read-only for everyone. I suspect you simply didn't specify
> MS_BIND | MS_RDONLY when attempting to remount sysfs.
I've attached a simple test app that tries to bind mount /sys and
remount it readonly. It fails with EPERM.
The mounts i have over /sys are:
15 57 0:15 / /sys rw,nosuid,nodev,noexec,relatime shared:6 - sysfs sysfs
rw,seclabel
18 15 0:16 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:7 -
securityfs securityfs rw
22 15 0:19 / /sys/fs/cgroup ro,nosuid,nodev,noexec shared:8 - tmpfs tmpfs
ro,seclabel,mode=755
23 22 0:20 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:9 -
cgroup cgroup
rw,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
24 15 0:21 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:19 - pstore
pstore rw
25 22 0:22 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:10 -
cgroup cgroup rw,cpuset
26 22 0:23 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime
shared:11 - cgroup cgroup rw,cpu,cpuacct
27 22 0:24 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:12 -
cgroup cgroup rw,memory
28 22 0:25 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:13 -
cgroup cgroup rw,devices
29 22 0:26 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:14 -
cgroup cgroup rw,freezer
30 22 0:27 / /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime
shared:15 - cgroup cgroup rw,net_cls,net_prio
31 22 0:28 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:16 -
cgroup cgroup rw,blkio
32 22 0:29 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime
shared:17 - cgroup cgroup rw,perf_event
33 22 0:30 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime shared:18 -
cgroup cgroup rw,hugetlb
54 15 0:31 / /sys/kernel/config rw,relatime shared:20 - configfs configfs rw
34 15 0:14 / /sys/fs/selinux rw,relatime shared:21 - selinuxfs selinuxfs rw
38 15 0:6 / /sys/kernel/debug rw,relatime shared:26 - debugfs debugfs rw
202 15 0:48 / /sys/fs/fuse/connections rw,relatime shared:147 - fusectl fusectl
rw
Also, I'd like to make all the recursively bound subtrees readonly. Is
there a better way to do this than enumerating all mounts and remounting
all that are under /sys.
In fact this is a general problem i have with recursive bind mounts. If
I want to grant access to some directory with limited access (for
example read-only or nosuid) then I have to use a recursive bind mount,
but the remount is not recursive, and furthermore, it does not apply to
later mounts that get propagated into my namespace.
--
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Alexander Larsson Red Hat, Inc
[email protected] [email protected]
He's a world-famous flyboy werewolf with a passion for fast cars. She's
an enchanted junkie vampire from aristocratic European stock. They fight
crime!
#define _GNU_SOURCE /* Required for CLONE_NEWNS */
#include <assert.h>
#include <arpa/inet.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/loop.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <net/if.h>
#include <netinet/in.h>
#include <sched.h>
#include <signal.h>
#include <poll.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/eventfd.h>
#include <sys/signalfd.h>
#include <sys/capability.h>
#include <sys/prctl.h>
#include <unistd.h>
static void
die_with_error (const char *format, ...)
{
va_list args;
int errsv;
errsv = errno;
va_start (args, format);
vfprintf (stderr, format, args);
va_end (args);
fprintf (stderr, ": %s\n", strerror (errsv));
exit (1);
}
static void
die (const char *format, ...)
{
va_list args;
va_start (args, format);
vfprintf (stderr, format, args);
va_end (args);
fprintf (stderr, "\n");
exit (1);
}
static char*
strdup_printf (const char *format,
...)
{
char *buffer = NULL;
va_list args;
va_start (args, format);
vasprintf (&buffer, format, args);
va_end (args);
if (buffer == NULL)
die ("oom");
return buffer;
}
static inline int raw_clone(unsigned long flags, void *child_stack) {
#if defined(__s390__) || defined(__CRIS__)
/* On s390 and cris the order of the first and second arguments
* of the raw clone() system call is reversed. */
return (int) syscall(__NR_clone, child_stack, flags);
#else
return (int) syscall(__NR_clone, flags, child_stack);
#endif
}
static int
write_to_file (int fd, const char *content)
{
ssize_t len = strlen (content);
ssize_t res;
while (len > 0)
{
res = write (fd, content, len);
if (res < 0 && errno == EINTR)
continue;
if (res <= 0)
return -1;
len -= res;
content += res;
}
return 0;
}
static int
write_file (const char *path, const char *content)
{
int fd;
int res;
fd = open (path, O_RDWR | O_CLOEXEC, 0);
if (fd == -1)
return -1;
res = 0;
if (content)
res = write_to_file (fd, content);
close (fd);
return res;
}
int
main (int argc,
char **argv)
{
char *args[] = { "/bin/sh", NULL };
pid_t pid;
char *uid_map, *gid_map;
int uid, gid;
uid = getuid();
gid = getgid();
pid = raw_clone (SIGCHLD | CLONE_NEWUSER | CLONE_NEWNS,
NULL);
if (pid == -1)
die_with_error ("Creating new namespace failed");
if (pid != 0)
{
int status;
wait(&status);
exit (0); /* Should not be reached, but better safe... */
}
if (write_file("/proc/self/setgroups", "deny\n") < 0)
die_with_error ("error writing to setgroups");
uid_map = strdup_printf ("%d %d 1\n", uid, uid);
if (write_file ("/proc/self/uid_map", uid_map) < 0)
die_with_error ("setting up uid map");
free (uid_map);
gid_map = strdup_printf ("%d %d 1\n", gid, gid);
if (write_file ("/proc/self/gid_map", gid_map) < 0)
{
int errsv = errno;
fprintf (stderr, "error writing to gid_map: %s, content: %s", strerror (errsv), gid_map);
}
free (gid_map);
if (mkdir ("/tmp/foo", 0755) && errno != EEXIST)
die_with_error ("unable to create tmp");
if (mount ("", "/tmp/foo", "tmpfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL) != 0)
die_with_error ("Failed to mount tmpfs");
if (mkdir ("/tmp/foo/sys", 0755))
die_with_error ("unable to mkdir sys");
if (mount ("/sys", "/tmp/foo/sys", NULL, MS_MGC_VAL|MS_BIND|MS_REC, NULL) != 0)
{
int errsv = errno;
fprintf (stderr, "error bind mounting sys: %s\n", strerror (errsv));
}
if (mount ("none", "/tmp/foo/sys", NULL, MS_MGC_VAL|MS_BIND|MS_REMOUNT|MS_NODEV|MS_NOSUID|MS_RDONLY, NULL) != 0)
{
int errsv = errno;
fprintf (stderr, "error remounting sys: %s\n", strerror (errsv));
}
if (execv ("/bin/sh", args) == -1)
die_with_error ("execvp %s", args[0]);
printf ("end??\n");
return 1;
}
_______________________________________________
gnome-os-list mailing list
[email protected]
https://mail.gnome.org/mailman/listinfo/gnome-os-list