Author: mckusick
Date: Sun Nov 29 21:42:26 2015
New Revision: 291460
URL: https://svnweb.freebsd.org/changeset/base/291460

Log:
  As the kernel allocates and frees vnodes, it fully initializes them
  on every allocation and fully releases them on every free.  These
  are not trivial costs: it starts by zeroing a large structure then
  initializes a mutex, a lock manager lock, an rw lock, four lists,
  and six pointers. And looking at vfs.vnodes_created, these operations
  are being done millions of times an hour on a busy machine.
  
  As a performance optimization, this code update uses the uma_init
  and uma_fini routines to do these initializations and cleanups only
  as the vnodes enter and leave the vnode_zone. With this change the
  initializations are only done kern.maxvnodes times at system startup
  and then only rarely again. The frees are done only if the vnode_zone
  shrinks which never happens in practice. For those curious about the
  avoided work, look at the vnode_init() and vnode_fini() functions in
  kern/vfs_subr.c to see the code that has been removed from the main
  vnode allocation/free path.
  
  Reviewed by: kib
  Tested by:   Peter Holm

Modified:
  head/sys/kern/vfs_subr.c

Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c    Sun Nov 29 21:01:02 2015        (r291459)
+++ head/sys/kern/vfs_subr.c    Sun Nov 29 21:42:26 2015        (r291460)
@@ -346,6 +346,66 @@ PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_tr
 #ifndef        MAXVNODES_MAX
 #define        MAXVNODES_MAX   (512 * 1024 * 1024 / 64)        /* 8M */
 #endif
+
+/*
+ * Initialize a vnode as it first enters the zone.
+ */
+static int
+vnode_init(void *mem, int size, int flags)
+{
+       struct vnode *vp;
+       struct bufobj *bo;
+
+       vp = mem;
+       bzero(vp, size);
+       /*
+        * Setup locks.
+        */
+       vp->v_vnlock = &vp->v_lock;
+       mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF);
+       /*
+        * By default, don't allow shared locks unless filesystems opt-in.
+        */
+       lockinit(vp->v_vnlock, PVFS, "vnode", VLKTIMEOUT,
+           LK_NOSHARE | LK_IS_VNODE);
+       /*
+        * Initialize bufobj.
+        */
+       bo = &vp->v_bufobj;
+       bo->__bo_vnode = vp;
+       rw_init(BO_LOCKPTR(bo), "bufobj interlock");
+       bo->bo_private = vp;
+       TAILQ_INIT(&bo->bo_clean.bv_hd);
+       TAILQ_INIT(&bo->bo_dirty.bv_hd);
+       /*
+        * Initialize namecache.
+        */
+       LIST_INIT(&vp->v_cache_src);
+       TAILQ_INIT(&vp->v_cache_dst);
+       /*
+        * Initialize rangelocks.
+        */
+       rangelock_init(&vp->v_rl);
+       return (0);
+}
+
+/*
+ * Free a vnode when it is cleared from the zone.
+ */
+static void
+vnode_fini(void *mem, int size)
+{
+       struct vnode *vp;
+       struct bufobj *bo;
+
+       vp = mem;
+       rangelock_destroy(&vp->v_rl);
+       lockdestroy(vp->v_vnlock);
+       mtx_destroy(&vp->v_interlock);
+       bo = &vp->v_bufobj;
+       rw_destroy(BO_LOCKPTR(bo));
+}
+
 static void
 vntblinit(void *dummy __unused)
 {
@@ -379,7 +439,7 @@ vntblinit(void *dummy __unused)
        TAILQ_INIT(&vnode_free_list);
        mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF);
        vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
-           NULL, NULL, UMA_ALIGN_PTR, 0);
+           vnode_init, vnode_fini, UMA_ALIGN_PTR, 0);
        vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
            NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
        /*
@@ -1223,8 +1283,8 @@ getnewvnode(const char *tag, struct moun
     struct vnode **vpp)
 {
        struct vnode *vp;
-       struct bufobj *bo;
        struct thread *td;
+       struct lock_object *lo;
        static int cyclecount;
        int error;
 
@@ -1271,40 +1331,42 @@ getnewvnode(const char *tag, struct moun
        mtx_unlock(&vnode_free_list_mtx);
 alloc:
        atomic_add_long(&vnodes_created, 1);
-       vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK|M_ZERO);
+       vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK);
        /*
-        * Setup locks.
-        */
-       vp->v_vnlock = &vp->v_lock;
-       mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF);
-       /*
-        * By default, don't allow shared locks unless filesystems
-        * opt-in.
-        */
-       lockinit(vp->v_vnlock, PVFS, tag, VLKTIMEOUT, LK_NOSHARE | LK_IS_VNODE);
-       /*
-        * Initialize bufobj.
+        * Locks are given the generic name "vnode" when created.
+        * Follow the historic practice of using the filesystem
+        * name when they allocated, e.g., "zfs", "ufs", "nfs, etc.
+        *
+        * Locks live in a witness group keyed on their name. Thus,
+        * when a lock is renamed, it must also move from the witness
+        * group of its old name to the witness group of its new name.
+        *
+        * The change only needs to be made when the vnode moves
+        * from one filesystem type to another. We ensure that each
+        * filesystem use a single static name pointer for its tag so
+        * that we can compare pointers rather than doing a strcmp().
         */
-       bo = &vp->v_bufobj;
-       bo->__bo_vnode = vp;
-       rw_init(BO_LOCKPTR(bo), "bufobj interlock");
-       bo->bo_ops = &buf_ops_bio;
-       bo->bo_private = vp;
-       TAILQ_INIT(&bo->bo_clean.bv_hd);
-       TAILQ_INIT(&bo->bo_dirty.bv_hd);
+       lo = &vp->v_vnlock->lock_object;
+       if (lo->lo_name != tag) {
+               lo->lo_name = tag;
+               WITNESS_DESTROY(lo);
+               WITNESS_INIT(lo, tag);
+       }
        /*
-        * Initialize namecache.
+        * By default, don't allow shared locks unless filesystems opt-in.
         */
-       LIST_INIT(&vp->v_cache_src);
-       TAILQ_INIT(&vp->v_cache_dst);
+       vp->v_vnlock->lock_object.lo_flags |= LK_NOSHARE;
        /*
         * Finalize various vnode identity bits.
         */
+       KASSERT(vp->v_object == NULL, ("stale v_object %p", vp));
+       KASSERT(vp->v_lockf == NULL, ("stale v_lockf %p", vp));
+       KASSERT(vp->v_pollinfo == NULL, ("stale v_pollinfo %p", vp));
        vp->v_type = VNON;
        vp->v_tag = tag;
        vp->v_op = vops;
        v_init_counters(vp);
-       vp->v_data = NULL;
+       vp->v_bufobj.bo_ops = &buf_ops_bio;
 #ifdef MAC
        mac_vnode_init(vp);
        if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0)
@@ -1313,11 +1375,10 @@ alloc:
                printf("NULL mp in getnewvnode()\n");
 #endif
        if (mp != NULL) {
-               bo->bo_bsize = mp->mnt_stat.f_iosize;
+               vp->v_bufobj.bo_bsize = mp->mnt_stat.f_iosize;
                if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0)
                        vp->v_vflag |= VV_NOKNOTE;
        }
-       rangelock_init(&vp->v_rl);
 
        /*
         * For the filesystems which do not use vfs_hash_insert(),
@@ -2683,6 +2744,12 @@ _vdrop(struct vnode *vp, bool locked)
        }
        /*
         * The vnode has been marked for destruction, so free it.
+        *
+        * The vnode will be returned to the zone where it will
+        * normally remain until it is needed for another vnode. We
+        * need to cleanup (or verify that the cleanup has already
+        * been done) any residual data left from its current use
+        * so as not to contaminate the freshly allocated vnode.
         */
        CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp);
        atomic_subtract_long(&numvnodes, 1);
@@ -2707,16 +2774,17 @@ _vdrop(struct vnode *vp, bool locked)
 #ifdef MAC
        mac_vnode_destroy(vp);
 #endif
-       if (vp->v_pollinfo != NULL)
+       if (vp->v_pollinfo != NULL) {
                destroy_vpollinfo(vp->v_pollinfo);
+               vp->v_pollinfo = NULL;
+       }
 #ifdef INVARIANTS
        /* XXX Elsewhere we detect an already freed vnode via NULL v_op. */
        vp->v_op = NULL;
 #endif
-       rangelock_destroy(&vp->v_rl);
-       lockdestroy(vp->v_vnlock);
-       mtx_destroy(&vp->v_interlock);
-       rw_destroy(BO_LOCKPTR(bo));
+       vp->v_iflag = 0;
+       vp->v_vflag = 0;
+       bo->bo_flag = 0;
        uma_zfree(vnode_zone, vp);
 }
 
@@ -3081,6 +3149,7 @@ vgonel(struct vnode *vp)
         * Clear the advisory locks and wake up waiting threads.
         */
        (void)VOP_ADVLOCKPURGE(vp);
+       vp->v_lockf = NULL;
        /*
         * Delete from old mount point vnode list.
         */
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to