This patch adds the shared/private/slave support for VFS trees. Signed by Ram Pai ([EMAIL PROTECTED])
fs/Makefile | 2 fs/dcache.c | 2 fs/namespace.c | 98 +++++++++++++++++++++++++++++++ fs/pnode.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 5 + include/linux/mount.h | 44 ++++++++++++- include/linux/pnode.h | 80 +++++++++++++++++++++++++ 7 files changed, 385 insertions(+), 4 deletions(-) Index: 2.6.12.work1/fs/namespace.c =================================================================== --- 2.6.12.work1.orig/fs/namespace.c +++ 2.6.12.work1/fs/namespace.c @@ -22,6 +22,7 @@ #include <linux/namei.h> #include <linux/security.h> #include <linux/mount.h> +#include <linux/pnode.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -62,6 +63,7 @@ struct vfsmount *alloc_vfsmnt(const char INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); INIT_LIST_HEAD(&mnt->mnt_fslink); + INIT_LIST_HEAD(&mnt->mnt_pnode_mntlist); if (name) { int size = strlen(name)+1; char *newname = kmalloc(size, GFP_KERNEL); @@ -615,6 +617,100 @@ out_unlock: return err; } +static int do_make_shared(struct vfsmount *mnt) +{ + int err=0; + struct vfspnode *old_pnode = NULL; + /* + * if the mount is already a slave mount, + * allocated a new pnode and make it + * a slave pnode of the original pnode. + */ + if (IS_MNT_SLAVE(mnt)) { + old_pnode = mnt->mnt_pnode; + pnode_del_slave_mnt(mnt); + } + if(!IS_MNT_SHARED(mnt)) { + mnt->mnt_pnode = pnode_alloc(); + if(!mnt->mnt_pnode) { + pnode_add_slave_mnt(old_pnode, mnt); + err = -ENOMEM; + goto out; + } + pnode_add_member_mnt(mnt->mnt_pnode, mnt); + } + if(old_pnode) + pnode_add_slave_pnode(old_pnode, mnt->mnt_pnode); + set_mnt_shared(mnt); +out: + return err; +} + +static int do_make_slave(struct vfsmount *mnt) +{ + int err=0; + struct vfspnode *old_pnode = NULL; + + if (IS_MNT_SLAVE(mnt)) + goto out; + /* + * only shared mounts can + * be made slave + */ + if (!IS_MNT_SHARED(mnt)) { + err = -EINVAL; + goto out; + } + old_pnode = mnt->mnt_pnode; + pnode_del_member_mnt(mnt); + pnode_add_slave_mnt(old_pnode, mnt); + set_mnt_slave(mnt); + +out: + return err; +} + +static int do_make_private(struct vfsmount *mnt) +{ + if(mnt->mnt_pnode) + pnode_disassociate_mnt(mnt); + set_mnt_private(mnt); + return 0; +} + +/* + * recursively change the type of the mountpoint. + */ +static int do_change_type(struct nameidata *nd, int flag) +{ + struct vfsmount *m, *mnt = nd->mnt; + int err=0; + + if (!(flag & MS_SHARED) && !(flag & MS_PRIVATE) + && !(flag & MS_SLAVE)) + return -EINVAL; + + if (nd->dentry != nd->mnt->mnt_root) + return -EINVAL; + + spin_lock(&vfsmount_lock); + for (m = mnt; m; m = next_mnt(m, mnt)) { + switch (flag) { + case MS_SHARED: + err = do_make_shared(m); + break; + case MS_SLAVE: + err = do_make_slave(m); + break; + case MS_PRIVATE: + err = do_make_private(m); + break; + } + } + spin_unlock(&vfsmount_lock); + return err; +} + /* * do loopback mount. */ @@ -1049,6 +1145,8 @@ long do_mount(char * dev_name, char * di data_page); else if (flags & MS_BIND) retval = do_loopback(&nd, dev_name, flags & MS_REC); + else if (flags & MS_SHARED || flags & MS_PRIVATE || flags & MS_SLAVE) + retval = do_change_type(&nd, flags); else if (flags & MS_MOVE) retval = do_move_mount(&nd, dev_name); else Index: 2.6.12.work1/fs/pnode.c =================================================================== --- /dev/null +++ 2.6.12.work1/fs/pnode.c @@ -0,0 +1,158 @@ +/* + * linux/fs/pnode.c + * + * (C) Copyright IBM Corporation 2005. + * Released under GPL v2. + * Author : Ram Pai ([EMAIL PROTECTED]) + * + */ + +#include <linux/config.h> +#include <linux/syscalls.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/smp_lock.h> +#include <linux/init.h> +#include <linux/quotaops.h> +#include <linux/acct.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/namespace.h> +#include <linux/namei.h> +#include <linux/security.h> +#include <linux/mount.h> +#include <linux/pnode.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> +#include <stdarg.h> + + +static kmem_cache_t * pnode_cachep; + +/* spinlock for pnode related operations */ + __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfspnode_lock); + + +void __init pnode_init(unsigned long mempages) +{ + pnode_cachep = kmem_cache_create("pnode_cache", + sizeof(struct vfspnode), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); +} + + +struct vfspnode * pnode_alloc(void) +{ + struct vfspnode *pnode = kmem_cache_alloc(pnode_cachep, GFP_KERNEL); + INIT_LIST_HEAD(&pnode->pnode_vfs); + INIT_LIST_HEAD(&pnode->pnode_slavevfs); + INIT_LIST_HEAD(&pnode->pnode_slavepnode); + INIT_LIST_HEAD(&pnode->pnode_peer_slave); + pnode->pnode_master = NULL; + pnode->pnode_flags = 0; + atomic_set(&pnode->pnode_count,0); + return pnode; +} + +void pnode_free(struct vfspnode *pnode) +{ + kmem_cache_free(pnode_cachep, pnode); +} + +/* + * __put_pnode() should be called with vfspnode_lock held + */ +void __put_pnode(struct vfspnode *pnode) +{ + struct vfspnode *tmp_pnode; + do { + tmp_pnode = pnode->pnode_master; + list_del_init(&pnode->pnode_peer_slave); + BUG_ON(!list_empty(&pnode->pnode_vfs)); + BUG_ON(!list_empty(&pnode->pnode_slavevfs)); + BUG_ON(!list_empty(&pnode->pnode_slavepnode)); + pnode_free(pnode); + pnode = tmp_pnode; + if (!pnode || !atomic_dec_and_test(&pnode->pnode_count)) + break; + } while(pnode); +} + +static void inline pnode_add_mnt(struct vfspnode *pnode, + struct vfsmount *mnt, int slave) +{ + if (!pnode || !mnt) + return; + spin_lock(&vfspnode_lock); + mnt->mnt_pnode = pnode; + if (slave) { + set_mnt_slave(mnt); + list_add(&mnt->mnt_pnode_mntlist, &pnode->pnode_slavevfs); + } else { + set_mnt_shared(mnt); + list_add(&mnt->mnt_pnode_mntlist, &pnode->pnode_vfs); + } + get_pnode(pnode); + spin_unlock(&vfspnode_lock); +} + +void pnode_add_member_mnt(struct vfspnode *pnode, + struct vfsmount *mnt) +{ + pnode_add_mnt(pnode, mnt, 0); +} + +void pnode_add_slave_mnt(struct vfspnode *pnode, + struct vfsmount *mnt) +{ + pnode_add_mnt(pnode, mnt, 1); +} + + +void pnode_add_slave_pnode(struct vfspnode *pnode, + struct vfspnode *slave_pnode) +{ + if (!pnode || !slave_pnode) + return; + spin_lock(&vfspnode_lock); + slave_pnode->pnode_master = pnode; + slave_pnode->pnode_flags = 0; + list_add(&slave_pnode->pnode_peer_slave, &pnode->pnode_slavepnode); + get_pnode(pnode); + spin_unlock(&vfspnode_lock); +} + +static void _pnode_disassociate_mnt(struct vfsmount *mnt) +{ + spin_lock(&vfspnode_lock); + list_del_init(&mnt->mnt_pnode_mntlist); + put_pnode_locked(mnt->mnt_pnode); + spin_unlock(&vfspnode_lock); + mnt->mnt_pnode = NULL; +} + +void pnode_del_slave_mnt(struct vfsmount *mnt) +{ + if (!mnt) + return; + _pnode_disassociate_mnt(mnt); + CLEAR_MNT_SLAVE(mnt); +} + +void pnode_del_member_mnt(struct vfsmount *mnt) +{ + if (!mnt) + return; + _pnode_disassociate_mnt(mnt); + CLEAR_MNT_SHARED(mnt); +} + + +void pnode_disassociate_mnt(struct vfsmount *mnt) +{ + if (!mnt) + return; + _pnode_disassociate_mnt(mnt); + CLEAR_MNT_SHARED(mnt); + CLEAR_MNT_SLAVE(mnt); +} Index: 2.6.12.work1/fs/dcache.c =================================================================== --- 2.6.12.work1.orig/fs/dcache.c +++ 2.6.12.work1/fs/dcache.c @@ -27,6 +27,7 @@ #include <linux/module.h> #include <linux/mount.h> #include <linux/file.h> +#include <linux/pnode.h> #include <asm/uaccess.h> #include <linux/security.h> #include <linux/seqlock.h> @@ -1737,6 +1738,7 @@ void __init vfs_caches_init(unsigned lon inode_init(mempages); files_init(mempages); mnt_init(mempages); + pnode_init(mempages); bdev_cache_init(); chrdev_init(); } Index: 2.6.12.work1/include/linux/fs.h =================================================================== --- 2.6.12.work1.orig/include/linux/fs.h +++ 2.6.12.work1/include/linux/fs.h @@ -102,6 +102,9 @@ extern int dir_notify_enable; #define MS_MOVE 8192 #define MS_REC 16384 #define MS_VERBOSE 32768 +#define MS_PRIVATE (1<<18) /* recursively change to private */ +#define MS_SLAVE (1<<19) /* recursively change to slave */ +#define MS_SHARED (1<<20) /* recursively change to shared */ #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) @@ -232,6 +235,7 @@ extern void update_atime (struct inode * extern void __init inode_init(unsigned long); extern void __init inode_init_early(void); extern void __init mnt_init(unsigned long); +extern void __init pnode_init(unsigned long); extern void __init files_init(unsigned long); struct buffer_head; @@ -1211,6 +1215,7 @@ extern struct vfsmount *kern_mount(struc extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); extern long do_mount(char *, char *, char *, unsigned long, void *); +extern struct vfsmount *do_make_mounted(struct vfsmount *, struct dentry *); extern int vfs_statfs(struct super_block *, struct kstatfs *); Index: 2.6.12.work1/include/linux/pnode.h =================================================================== --- /dev/null +++ 2.6.12.work1/include/linux/pnode.h @@ -0,0 +1,80 @@ +/* + * linux/fs/pnode.c + * + * (C) Copyright IBM Corporation 2005. + * Released under GPL v2. + * + */ +#ifndef _LINUX_PNODE_H +#define _LINUX_PNODE_H + +#include <linux/list.h> +#include <linux/mount.h> +#include <linux/spinlock.h> +#include <asm/atomic.h> + +struct vfspnode { + struct list_head pnode_vfs; /* list of vfsmounts anchored here */ + struct list_head pnode_slavevfs; /* list of slave vfsmounts */ + struct list_head pnode_slavepnode;/* list of slave pnode */ + struct list_head pnode_peer_slave;/* going through master's slave pnode + list*/ + struct vfspnode *pnode_master; /* master pnode */ + int pnode_flags; + atomic_t pnode_count; +}; +#define PNODE_MAX_SLAVE_LEVEL 10 +#define PNODE_DELETE 0x01 +#define PNODE_SLAVE 0x02 + +#define IS_PNODE_DELETE(pn) ((pn->pnode_flags&PNODE_DELETE)==PNODE_DELETE) +#define IS_PNODE_SLAVE(pn) ((pn->pnode_flags&PNODE_SLAVE)==PNODE_SLAVE) +#define SET_PNODE_DELETE(pn) pn->pnode_flags |= PNODE_DELETE +#define SET_PNODE_SLAVE(pn) pn->pnode_flags |= PNODE_SLAVE + +extern spinlock_t vfspnode_lock; +extern void __put_pnode(struct vfspnode *); + +static inline struct vfspnode * +get_pnode(struct vfspnode *pnode) +{ + if (!pnode) + return NULL; + atomic_inc(&pnode->pnode_count); + return pnode; +} + +static inline void +put_pnode(struct vfspnode *pnode) +{ + if (!pnode) + return; + if (atomic_dec_and_lock(&pnode->pnode_count, &vfspnode_lock)) { + __put_pnode(pnode); + spin_unlock(&vfspnode_lock); + } +} + +/* + * must be called holding the vfspnode_lock + */ +static inline void +put_pnode_locked(struct vfspnode *pnode) +{ + if (!pnode) + return; + if (atomic_dec_and_test(&pnode->pnode_count)) { + __put_pnode(pnode); + } +} + +void __init pnode_init(unsigned long ); +struct vfspnode * pnode_alloc(void); +void pnode_add_slave_mnt(struct vfspnode *, struct vfsmount *); +void pnode_add_member_mnt(struct vfspnode *, struct vfsmount *); +void pnode_del_slave_mnt(struct vfsmount *); +void pnode_del_member_mnt(struct vfsmount *); +void pnode_disassociate_mnt(struct vfsmount *); +void pnode_add_slave_pnode(struct vfspnode *, struct vfspnode *); +struct vfsmount * pnode_make_mounted(struct vfspnode *, struct vfsmount *, struct dentry *); +#endif /* _LINUX_PNODE_H */ Index: 2.6.12.work1/include/linux/mount.h =================================================================== --- 2.6.12.work1.orig/include/linux/mount.h +++ 2.6.12.work1/include/linux/mount.h @@ -16,9 +16,21 @@ #include <linux/spinlock.h> #include <asm/atomic.h> -#define MNT_NOSUID 1 -#define MNT_NODEV 2 -#define MNT_NOEXEC 4 +#define MNT_NOSUID 0x01 +#define MNT_NODEV 0x02 +#define MNT_NOEXEC 0x04 +#define MNT_PRIVATE 0x10 /* if the vfsmount is private, by default it is private*/ +#define MNT_SLAVE 0x20 /* if the vfsmount is a slave mount of its pnode */ +#define MNT_SHARED 0x40 /* if the vfsmount is a slave mount of its pnode */ +#define MNT_PNODE_MASK 0xf0 /* propogation flag mask */ + +#define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED) +#define IS_MNT_SLAVE(mnt) (mnt->mnt_flags & MNT_SLAVE) +#define IS_MNT_PRIVATE(mnt) (mnt->mnt_flags & MNT_PRIVATE) + +#define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~(MNT_PNODE_MASK & MNT_SHARED)) +#define CLEAR_MNT_PRIVATE(mnt) (mnt->mnt_flags &= ~(MNT_PNODE_MASK & MNT_PRIVATE)) +#define CLEAR_MNT_SLAVE(mnt) (mnt->mnt_flags &= ~(MNT_PNODE_MASK & MNT_SLAVE)) struct vfsmount { @@ -29,6 +41,10 @@ struct vfsmount struct super_block *mnt_sb; /* pointer to superblock */ struct list_head mnt_mounts; /* list of children, anchored here */ struct list_head mnt_child; /* and going through their mnt_child */ + struct list_head mnt_pnode_mntlist;/* and going through their + pnode's vfsmount */ + struct vfspnode *mnt_pnode; /* and going through their + pnode's vfsmount */ atomic_t mnt_count; int mnt_flags; int mnt_expiry_mark; /* true if marked for expiry */ @@ -38,6 +54,28 @@ struct vfsmount struct namespace *mnt_namespace; /* containing namespace */ }; +static inline void set_mnt_shared(struct vfsmount *mnt) +{ + mnt->mnt_flags |= MNT_PNODE_MASK & MNT_SHARED; + CLEAR_MNT_PRIVATE(mnt); + CLEAR_MNT_SLAVE(mnt); +} + +static inline void set_mnt_private(struct vfsmount *mnt) +{ + mnt->mnt_flags |= MNT_PNODE_MASK & MNT_PRIVATE; + CLEAR_MNT_SLAVE(mnt); + CLEAR_MNT_SHARED(mnt); + mnt->mnt_pnode = NULL; +} + +static inline void set_mnt_slave(struct vfsmount *mnt) +{ + mnt->mnt_flags |= MNT_PNODE_MASK & MNT_SLAVE; + CLEAR_MNT_PRIVATE(mnt); + CLEAR_MNT_SHARED(mnt); +} + static inline struct vfsmount *mntget(struct vfsmount *mnt) { if (mnt) Index: 2.6.12.work1/fs/Makefile =================================================================== --- 2.6.12.work1.orig/fs/Makefile +++ 2.6.12.work1/fs/Makefile @@ -8,7 +8,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ block_dev.o char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ - attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ + attr.o bad_inode.o file.o filesystems.o namespace.o pnode.o aio.o \ seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ obj-$(CONFIG_EPOLL) += eventpoll.o - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/