From: Bharata B Rao <[EMAIL PROTECTED]>
Subject: Mount changes to support union mount.

Adds union mount support.

This patch adds a new mount type for union mount (MNT_UNION) and changes
the mount path to build a union stack during mount. The routines for
supporting the creation, traversal and destruction of union stacks are
also included here.

Signed-off-by: Bharata B Rao <[EMAIL PROTECTED]>
---
 fs/namespace.c        |  164 ++++++++++++++++++++++++++++++++++++++++++++++----
 include/linux/fs.h    |    1 
 include/linux/mount.h |   17 +++++
 3 files changed, 172 insertions(+), 10 deletions(-)

--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -35,6 +35,7 @@ __cacheline_aligned_in_smp DEFINE_SPINLO
 static int event;
 
 static struct list_head *mount_hashtable __read_mostly;
+static struct list_head *union_mount_hashtable;
 static int hash_mask __read_mostly, hash_bits __read_mostly;
 static struct kmem_cache *mnt_cache __read_mostly;
 static struct rw_semaphore namespace_sem;
@@ -54,6 +55,89 @@ static inline unsigned long hash(struct 
        return tmp & hash_mask;
 }
 
+/* Must be called with vfsmount_lock held */
+static struct union_mount *find_union_mount(struct vfsmount *mnt,
+               struct dentry *dentry)
+{
+       struct list_head *head;
+       struct union_mount *u;
+
+       if (!IS_MNT_UNION(mnt))
+               return NULL;
+
+       head = union_mount_hashtable + hash(mnt, dentry);
+       list_for_each_entry(u, head, hash)
+               if (u->src_mnt == mnt && u->src_dentry == dentry)
+                       return u;
+       return NULL;
+}
+
+/*
+ * When propagating mount events to peer group, this is called under
+ * vfsmount_lock. Hence using GFP_ATOMIC for kmalloc here.
+ * TODO: Can we use a separate kmem cache for union_mount ?
+ */
+struct union_mount *alloc_union_mount(struct vfsmount *src_mnt,
+       struct dentry *src_dentry, struct vfsmount *dst_mnt,
+       struct dentry *dst_dentry)
+{
+       struct union_mount *u;
+       u = kmalloc(sizeof(struct union_mount), GFP_ATOMIC);
+       if (!u)
+               return u;
+       u->dst_mnt = mntget(dst_mnt);
+       u->dst_dentry = dget(dst_dentry);
+       u->src_mnt = src_mnt;
+       u->src_dentry = dget(src_dentry);
+       INIT_LIST_HEAD(&u->hash);
+       INIT_LIST_HEAD(&u->list);
+       return u;
+}
+
+/* Must be called with vfsmount_lock held */
+void attach_mnt_union(struct union_mount *u)
+{
+       if (!u)
+               return;
+
+       list_add_tail(&u->hash, union_mount_hashtable +
+                       hash(u->src_mnt, u->src_dentry));
+       list_add_tail(&u->list, &u->src_mnt->mnt_union);
+}
+
+/*
+ * Finds the next (vfsmount, dentry) in the union stack. If found, returns
+ * it via @nd and returns true. Else doesn't modify @nd, but returns false.
+ */
+int next_union_mount(struct nameidata *nd)
+{
+       struct union_mount *u;
+
+       spin_lock(&vfsmount_lock);
+       u = find_union_mount(nd->mnt, nd->dentry);
+       spin_unlock(&vfsmount_lock);
+       if (u) {
+               nd->mnt = u->dst_mnt;
+               nd->dentry = u->dst_dentry;
+               return 1;
+       }
+       return 0;
+}
+
+/* Check if next element of the union stack exists. @nd isn't modified. */
+int next_union_mount_exists(struct vfsmount *mnt, struct dentry *dentry)
+{
+       struct union_mount *u;
+
+       spin_lock(&vfsmount_lock);
+       u = find_union_mount(mnt, dentry);
+       spin_unlock(&vfsmount_lock);
+       if (u)
+               return 1;
+       else
+               return 0;
+}
+
 struct vfsmount *alloc_vfsmnt(const char *name)
 {
        struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -67,6 +151,7 @@ struct vfsmount *alloc_vfsmnt(const char
                INIT_LIST_HEAD(&mnt->mnt_share);
                INIT_LIST_HEAD(&mnt->mnt_slave_list);
                INIT_LIST_HEAD(&mnt->mnt_slave);
+               INIT_LIST_HEAD(&mnt->mnt_union);
                if (name) {
                        int size = strlen(name) + 1;
                        char *newname = kmalloc(size, GFP_KERNEL);
@@ -173,18 +258,20 @@ void mnt_set_mountpoint(struct vfsmount 
        dentry->d_mounted++;
 }
 
-static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd)
+static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd,
+               struct union_mount *u)
 {
        mnt_set_mountpoint(nd->mnt, nd->dentry, mnt);
        list_add_tail(&mnt->mnt_hash, mount_hashtable +
                        hash(nd->mnt, nd->dentry));
        list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+       attach_mnt_union(u);
 }
 
 /*
  * the caller must hold vfsmount_lock
  */
-static void commit_tree(struct vfsmount *mnt)
+static void commit_tree(struct vfsmount *mnt, struct union_mount *u)
 {
        struct vfsmount *parent = mnt->mnt_parent;
        struct vfsmount *m;
@@ -201,6 +288,7 @@ static void commit_tree(struct vfsmount 
        list_add_tail(&mnt->mnt_hash, mount_hashtable +
                                hash(parent, mnt->mnt_mountpoint));
        list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+       attach_mnt_union(u);
        touch_mnt_namespace(n);
 }
 
@@ -342,8 +430,18 @@ static struct vfsmount *clone_mnt(struct
 static inline void __mntput(struct vfsmount *mnt)
 {
        struct super_block *sb = mnt->mnt_sb;
+       struct union_mount *u, *next;
+
        dput(mnt->mnt_root);
        clear_mnt_user(mnt);
+
+       list_for_each_entry_safe(u, next, &mnt->mnt_union, list) {
+               list_del_init(&u->list);
+               dput(u->src_dentry);
+               mntput(u->dst_mnt);
+               dput(u->dst_dentry);
+               kfree(u);
+       }
        free_vfsmnt(mnt);
        deactivate_super(sb);
 }
@@ -352,6 +450,17 @@ void mntput_no_expire(struct vfsmount *m
 {
 repeat:
        if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
+               struct union_mount *u;
+
+               /*
+                * Remove all union_mounts under this mnt from the
+                * union_mount_hashtable. This needs to be be done with
+                * vfsmount_lock held. The rest of the cleanup is done
+                * outside of the lock.
+                */
+               list_for_each_entry(u, &mnt->mnt_union, list)
+                       list_del_init(&u->hash);
+
                if (likely(!mnt->mnt_pinned)) {
                        spin_unlock(&vfsmount_lock);
                        __mntput(mnt);
@@ -436,6 +545,7 @@ static int show_vfsmnt(struct seq_file *
                { MNT_NODIRATIME, ",nodiratime" },
                { MNT_RELATIME, ",relatime" },
                { MNT_NOMNT, ",nomnt" },
+               { MNT_UNION, ",union" },
                { 0, NULL }
        };
        struct proc_fs_info *fs_infop;
@@ -839,7 +949,11 @@ struct vfsmount *copy_tree(struct vfsmou
                                goto error;
                        spin_lock(&vfsmount_lock);
                        list_add_tail(&q->mnt_list, &res->mnt_list);
-                       attach_mnt(q, &nd);
+                       /*
+                        * TODO: Understand and pass appropriate union_mount
+                        * argument here.
+                        */
+                       attach_mnt(q, &nd, NULL);
                        spin_unlock(&vfsmount_lock);
                }
        }
@@ -925,10 +1039,16 @@ static int attach_recursive_mnt(struct v
        struct vfsmount *dest_mnt = nd->mnt;
        struct dentry *dest_dentry = nd->dentry;
        struct vfsmount *child, *p;
+       struct union_mount *u = NULL;
 
        if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list))
                return -EINVAL;
 
+       if (IS_MNT_UNION(source_mnt))
+               if (!(u = alloc_union_mount(source_mnt, source_mnt->mnt_root,
+                                       dest_mnt, dest_dentry)))
+                       return -ENOMEM;
+
        if (IS_MNT_SHARED(dest_mnt)) {
                for (p = source_mnt; p; p = next_mnt(p, source_mnt))
                        set_mnt_shared(p);
@@ -937,18 +1057,26 @@ static int attach_recursive_mnt(struct v
        spin_lock(&vfsmount_lock);
        if (parent_nd) {
                detach_mnt(source_mnt, parent_nd);
-               attach_mnt(source_mnt, nd);
+               attach_mnt(source_mnt, nd, u);
                touch_mnt_namespace(current->nsproxy->mnt_ns);
        } else {
                mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
-               commit_tree(source_mnt);
+               commit_tree(source_mnt, u);
        }
 
        list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
                list_del_init(&child->mnt_hash);
-               commit_tree(child);
+               if (IS_MNT_UNION(child)) {
+                       u = alloc_union_mount(child, child->mnt_root,
+                               child->mnt_parent, child->mnt_mountpoint);
+                       /* FIXME: It is too late to fail from here */
+                       if (!u)
+                               printk(KERN_ERR "attach_recursive_mnt: 
ENOMEM\n");
+               }
+               commit_tree(child, u);
        }
        spin_unlock(&vfsmount_lock);
+
        return 0;
 }
 
@@ -1556,9 +1684,12 @@ long do_mount(char *dev_name, char *dir_
                mnt_flags |= MNT_RELATIME;
        if (flags & MS_NOMNT)
                mnt_flags |= MNT_NOMNT;
+       if (flags & MS_UNION)
+               mnt_flags |= MNT_UNION;
 
        flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
-                  MS_NOATIME | MS_NODIRATIME | MS_RELATIME | MS_NOMNT);
+                  MS_NOATIME | MS_NODIRATIME | MS_RELATIME | MS_NOMNT |
+                  MS_UNION);
 
        /* ... and get the mountpoint */
        retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
@@ -1888,8 +2019,9 @@ asmlinkage long sys_pivot_root(const cha
                goto out3;
        detach_mnt(new_nd.mnt, &parent_nd);
        detach_mnt(user_nd.mnt, &root_parent);
-       attach_mnt(user_nd.mnt, &old_nd);     /* mount old root on put_old */
-       attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */
+       /* TODO: Understand and pass appropriate union_mount argument here. */
+       attach_mnt(user_nd.mnt, &old_nd, NULL);  /* mount old root on put_old */
+       attach_mnt(new_nd.mnt, &root_parent, NULL); /* mount new_root on / */
        touch_mnt_namespace(current->nsproxy->mnt_ns);
        spin_unlock(&vfsmount_lock);
        chroot_fs_refs(&user_nd, &new_nd);
@@ -1940,7 +2072,7 @@ static void __init init_mount_tree(void)
 
 void __init mnt_init(unsigned long mempages)
 {
-       struct list_head *d;
+       struct list_head *d, *e;
        unsigned int nr_hash;
        int i;
        int err;
@@ -1976,12 +2108,24 @@ void __init mnt_init(unsigned long mempa
 
        printk("Mount-cache hash table entries: %d\n", nr_hash);
 
+       /*
+        * Use the same nr_hash for union mount hashtable also.
+        * TODO: This might need a bigger hash table.
+        */
+       union_mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
+
+       if (!union_mount_hashtable)
+               panic("Failed to allocate union mount hash table\n");
+
        /* And initialize the newly allocated array */
        d = mount_hashtable;
+       e = union_mount_hashtable;
        i = nr_hash;
        do {
                INIT_LIST_HEAD(d);
+               INIT_LIST_HEAD(e);
                d++;
+               e++;
                i--;
        } while (i);
        err = sysfs_init();
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -113,6 +113,7 @@ extern int dir_notify_enable;
 #define MS_REMOUNT     32      /* Alter flags of a mounted FS */
 #define MS_MANDLOCK    64      /* Allow mandatory locks on an FS */
 #define MS_DIRSYNC     128     /* Directory modifications are synchronous */
+#define MS_UNION       256     /* Union mount */
 #define MS_NOATIME     1024    /* Do not update access times. */
 #define MS_NODIRATIME  2048    /* Do not update directory access times */
 #define MS_BIND                4096
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -36,6 +36,7 @@ struct mnt_namespace;
 #define MNT_SHARED     0x1000  /* if the vfsmount is a shared mount */
 #define MNT_UNBINDABLE 0x2000  /* if the vfsmount is a unbindable mount */
 #define MNT_PNODE_MASK 0x3000  /* propagation flag mask */
+#define MNT_UNION      0x4000  /* if the vfsmount is a union mount */
 
 struct vfsmount {
        struct list_head mnt_hash;
@@ -53,6 +54,7 @@ struct vfsmount {
        struct list_head mnt_share;     /* circular list of shared mounts */
        struct list_head mnt_slave_list;/* list of slave mounts */
        struct list_head mnt_slave;     /* slave list entry */
+       struct list_head mnt_union;     /* list of union_mounts */
        struct vfsmount *mnt_master;    /* slave is on master->mnt_slave_list */
        struct mnt_namespace *mnt_ns;   /* containing namespace */
        /*
@@ -107,5 +109,20 @@ extern void shrink_submounts(struct vfsm
 extern spinlock_t vfsmount_lock;
 extern dev_t name_to_dev_t(char *name);
 
+#define IS_MNT_UNION(mnt) (mnt->mnt_flags & MNT_UNION)
+
+struct union_mount {
+       struct vfsmount *src_mnt, *dst_mnt;
+       struct dentry *src_dentry, *dst_dentry;
+       struct list_head hash, list;
+};
+
+extern void attach_mnt_union(struct union_mount *u);
+extern struct union_mount *alloc_union_mount(struct vfsmount *src_mnt,
+       struct dentry *src_dentry, struct vfsmount *dst_mnt,
+       struct dentry *dst_dentry);
+extern int next_union_mount(struct nameidata *nd);
+extern int next_union_mount_exists(struct vfsmount *mnt, struct dentry 
*dentry);
+
 #endif
 #endif /* _LINUX_MOUNT_H */
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to