From: Serge E. Hallyn <[EMAIL PROTECTED]>
Subject: [PATCH -mm 6/8] user ns: implement shared mounts

Implement shared-ns mounts, which allow containers in different user
namespaces to share mounts.  Without this, containers can obviously
never even be started.

Here is a sample smount.c (based on Miklos' version) which only
does a bind mount of arg1 onto arg2, but making the destination
a shared-ns mount.

int main(int argc, char *argv[])
{
        int type;
        if(argc != 3) {
                fprintf(stderr, "usage: %s src dest", argv[0]);
                return 1;
        }

        fprintf(stdout, "%s %s %s\n", argv[0], argv[1], argv[2]);

        type = MS_SHARE_NS | MS_BIND;
        setfsuid(getuid());

        if(mount(argv[1], argv[2], "none", type, "") == -1) {
                perror("mount");
                return 1;
        }
        return 0;
}

Signed-off-by: Serge E. Hallyn <[EMAIL PROTECTED]>
---
 fs/namespace.c        |   30 ++++++++++++++++++++++++------
 fs/pnode.h            |    1 +
 include/linux/fs.h    |    1 +
 include/linux/mount.h |    1 +
 include/linux/sched.h |    2 ++
 5 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index a4039a3..60ca9b5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -234,7 +234,14 @@ static struct vfsmount *clone_mnt(struct
                                        int flag)
 {
        struct super_block *sb = old->mnt_sb;
-       struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
+       struct vfsmount *mnt;
+
+       if (!(old->mnt_flags & MNT_SHARE_NS)) {
+               if (old->mnt_user_ns != current->nsproxy->user_ns)
+                       return ERR_PTR(-EPERM);
+       }
+
+       mnt = alloc_vfsmnt(old->mnt_devname);
 
        if (mnt) {
                mnt->mnt_flags = old->mnt_flags;
@@ -257,6 +264,10 @@ static struct vfsmount *clone_mnt(struct
                }
                if (flag & CL_MAKE_SHARED)
                        set_mnt_shared(mnt);
+               if (flag & CL_SHARE_NS)
+                       mnt->mnt_flags |= MNT_SHARE_NS;
+               else
+                       mnt->mnt_flags &= ~MNT_SHARE_NS;
 
                /* stick the duplicate mount on the same expiry list
                 * as the original if that was on one */
@@ -368,6 +379,7 @@ static int show_vfsmnt(struct seq_file *
                { MNT_NOSUID, ",nosuid" },
                { MNT_NODEV, ",nodev" },
                { MNT_NOEXEC, ",noexec" },
+               { MNT_SHARE_NS, ",share_userns" },
                { MNT_NOATIME, ",noatime" },
                { MNT_NODIRATIME, ",nodiratime" },
                { MNT_RELATIME, ",relatime" },
@@ -902,11 +914,14 @@ static int do_change_type(struct nameida
 /*
  * do loopback mount.
  */
-static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
+static int do_loopback(struct nameidata *nd, char *old_name, int recurse,
+                                                        int uidns_share)
 {
        struct nameidata old_nd;
        struct vfsmount *mnt = NULL;
        int err = mount_is_safe(nd);
+       int flag = (uidns_share ? CL_SHARE_NS : 0);
+
        if (err)
                return err;
        if (!old_name || !*old_name)
@@ -925,9 +940,9 @@ static int do_loopback(struct nameidata 
 
        err = -ENOMEM;
        if (recurse)
-               mnt = copy_tree(old_nd.mnt, old_nd.dentry, 0);
+               mnt = copy_tree(old_nd.mnt, old_nd.dentry, flag);
        else
-               mnt = clone_mnt(old_nd.mnt, old_nd.dentry, 0);
+               mnt = clone_mnt(old_nd.mnt, old_nd.dentry, flag);
 
        if (!mnt || IS_ERR(mnt)) {
                err = mnt ? PTR_ERR(mnt) : -ENOMEM;
@@ -1414,9 +1429,11 @@ long do_mount(char *dev_name, char *dir_
                mnt_flags |= MNT_NODIRATIME;
        if (flags & MS_RELATIME)
                mnt_flags |= MNT_RELATIME;
+       if (flags & MS_SHARE_NS)
+               mnt_flags |= MNT_SHARE_NS;
 
        flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
-                  MS_NOATIME | MS_NODIRATIME | MS_RELATIME);
+                  MS_NOATIME | MS_NODIRATIME | MS_RELATIME | MS_SHARE_NS);
 
        /* ... and get the mountpoint */
        retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
@@ -1431,7 +1448,8 @@ long do_mount(char *dev_name, char *dir_
                retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
                                    data_page);
        else if (flags & MS_BIND)
-               retval = do_loopback(&nd, dev_name, flags & MS_REC);
+               retval = do_loopback(&nd, dev_name, flags & MS_REC,
+                                                mnt_flags & MNT_SHARE_NS);
        else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
                retval = do_change_type(&nd, flags);
        else if (flags & MS_MOVE)
diff --git a/fs/pnode.h b/fs/pnode.h
index d45bd8e..eb62f4c 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -22,6 +22,7 @@ #define CL_SLAVE              0x02
 #define CL_COPY_ALL            0x04
 #define CL_MAKE_SHARED                 0x08
 #define CL_PROPAGATION                 0x10
+#define CL_SHARE_NS            0x20
 
 static inline void set_mnt_shared(struct vfsmount *mnt)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bed19b7..569a637 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -121,6 +121,7 @@ #define MS_PRIVATE  (1<<18) /* change to 
 #define MS_SLAVE       (1<<19) /* change to slave */
 #define MS_SHARED      (1<<20) /* change to shared */
 #define MS_RELATIME    (1<<21) /* Update atime relative to mtime/ctime. */
+#define MS_SHARE_NS    (1<<22) /* ignore user namespaces for permission */
 #define MS_ACTIVE      (1<<30)
 #define MS_NOUSER      (1<<31)
 
diff --git a/include/linux/mount.h b/include/linux/mount.h
index e438195..00e5066 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -35,6 +35,7 @@ #define MNT_SHRINKABLE        0x100
 #define MNT_SHARED     0x1000  /* if the vfsmount is a shared mount */
 #define MNT_UNBINDABLE 0x2000  /* if the vfsmount is a unbindable mount */
 #define MNT_PNODE_MASK 0x3000  /* propogation flag mask */
+#define MNT_SHARE_NS   0x4000  /* ignore user namespaces for permission */
 
 struct vfsmount {
        struct list_head mnt_hash;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 450fc39..73df38c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1599,6 +1599,8 @@ static inline int task_mnt_same_uidns(st
 {
        if (tsk->nsproxy == init_task.nsproxy)
                return 1;
+       if (mnt->mnt_flags & MNT_SHARE_NS)
+               return 1;
        if (mnt->mnt_user_ns == tsk->nsproxy->user_ns)
                return 1;
        return 0;
-- 
1.4.1

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to