Author: mjg
Date: Fri Sep 30 17:27:17 2016
New Revision: 306512
URL: https://svnweb.freebsd.org/changeset/base/306512

Log:
  vfs: batch free vnodes in per-mnt lists
  
  Previously free vnodes would always by directly returned to the global
  LRU list. With this change up to mnt_free_list_batch vnodes are collected
  first.
  
  syncer runs always return the batch regardless of its size.
  
  While vnodes on per-mnt lists are not counted as free, they can be
  returned in case of vnode shortage.
  
  Reviewed by:  kib
  Tested by:    pho

Modified:
  head/sys/kern/vfs_mount.c
  head/sys/kern/vfs_subr.c
  head/sys/sys/mount.h
  head/sys/sys/vnode.h

Modified: head/sys/kern/vfs_mount.c
==============================================================================
--- head/sys/kern/vfs_mount.c   Fri Sep 30 17:19:43 2016        (r306511)
+++ head/sys/kern/vfs_mount.c   Fri Sep 30 17:27:17 2016        (r306512)
@@ -109,6 +109,7 @@ mount_init(void *mem, int size, int flag
 
        mp = (struct mount *)mem;
        mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
+       mtx_init(&mp->mnt_listmtx, "struct mount vlist mtx", NULL, MTX_DEF);
        lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
        return (0);
 }
@@ -120,6 +121,7 @@ mount_fini(void *mem, int size)
 
        mp = (struct mount *)mem;
        lockdestroy(&mp->mnt_explock);
+       mtx_destroy(&mp->mnt_listmtx);
        mtx_destroy(&mp->mnt_mtx);
 }
 
@@ -461,6 +463,8 @@ vfs_mount_alloc(struct vnode *vp, struct
        mp->mnt_nvnodelistsize = 0;
        TAILQ_INIT(&mp->mnt_activevnodelist);
        mp->mnt_activevnodelistsize = 0;
+       TAILQ_INIT(&mp->mnt_tmpfreevnodelist);
+       mp->mnt_tmpfreevnodelistsize = 0;
        mp->mnt_ref = 0;
        (void) vfs_busy(mp, MBF_NOWAIT);
        atomic_add_acq_int(&vfsp->vfc_refcount, 1);

Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c    Fri Sep 30 17:19:43 2016        (r306511)
+++ head/sys/kern/vfs_subr.c    Fri Sep 30 17:27:17 2016        (r306512)
@@ -112,6 +112,7 @@ static void vfs_knllock(void *arg);
 static void    vfs_knlunlock(void *arg);
 static void    vfs_knl_assert_locked(void *arg);
 static void    vfs_knl_assert_unlocked(void *arg);
+static void    vnlru_return_batches(struct vfsops *mnt_op);
 static void    destroy_vpollinfo(struct vpollinfo *vi);
 
 /*
@@ -127,6 +128,10 @@ static u_long vnodes_created;
 SYSCTL_ULONG(_vfs, OID_AUTO, vnodes_created, CTLFLAG_RD, &vnodes_created,
     0, "Number of vnodes created by getnewvnode");
 
+static u_long mnt_free_list_batch = 128;
+SYSCTL_ULONG(_vfs, OID_AUTO, mnt_free_list_batch, CTLFLAG_RW,
+    &mnt_free_list_batch, 0, "Limit of vnodes held on mnt's free list");
+
 /*
  * Conversion tables for conversion from vnode types to inode formats
  * and back.
@@ -953,7 +958,9 @@ vnlru_free_locked(int count, struct vfso
 {
        struct vnode *vp;
        struct mount *mp;
+       bool tried_batches;
 
+       tried_batches = false;
        mtx_assert(&vnode_free_list_mtx, MA_OWNED);
        if (count > max_vnlru_free)
                count = max_vnlru_free;
@@ -963,8 +970,16 @@ vnlru_free_locked(int count, struct vfso
                 * The list can be modified while the free_list_mtx
                 * has been dropped and vp could be NULL here.
                 */
-               if (!vp)
-                       break;
+               if (vp == NULL) {
+                       if (tried_batches)
+                               break;
+                       mtx_unlock(&vnode_free_list_mtx);
+                       vnlru_return_batches(mnt_op);
+                       tried_batches = true;
+                       mtx_lock(&vnode_free_list_mtx);
+                       continue;
+               }
+
                VNASSERT(vp->v_op != NULL, vp,
                    ("vnlru_free: vnode already reclaimed."));
                KASSERT((vp->v_iflag & VI_FREE) != 0,
@@ -1041,6 +1056,63 @@ vspace(void)
        return (space);
 }
 
+static void
+vnlru_return_batch_locked(struct mount *mp)
+{
+       struct vnode *vp;
+
+       mtx_assert(&mp->mnt_listmtx, MA_OWNED);
+
+       if (mp->mnt_tmpfreevnodelistsize == 0)
+               return;
+
+       mtx_lock(&vnode_free_list_mtx);
+       TAILQ_FOREACH(vp, &mp->mnt_tmpfreevnodelist, v_actfreelist) {
+               VNASSERT((vp->v_mflag & VMP_TMPMNTFREELIST) != 0, vp,
+                   ("vnode without VMP_TMPMNTFREELIST on 
mnt_tmpfreevnodelist"));
+               vp->v_mflag &= ~VMP_TMPMNTFREELIST;
+       }
+       TAILQ_CONCAT(&vnode_free_list, &mp->mnt_tmpfreevnodelist, 
v_actfreelist);
+       freevnodes += mp->mnt_tmpfreevnodelistsize;
+       mp->mnt_tmpfreevnodelistsize = 0;
+       mtx_unlock(&vnode_free_list_mtx);
+}
+
+static void
+vnlru_return_batch(struct mount *mp)
+{
+
+       mtx_lock(&mp->mnt_listmtx);
+       vnlru_return_batch_locked(mp);
+       mtx_unlock(&mp->mnt_listmtx);
+}
+
+static void
+vnlru_return_batches(struct vfsops *mnt_op)
+{
+       struct mount *mp, *nmp;
+       bool need_unbusy;
+
+       mtx_lock(&mountlist_mtx);
+       for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
+               need_unbusy = false;
+               if (mnt_op != NULL && mp->mnt_op != mnt_op)
+                       goto next;
+               if (mp->mnt_tmpfreevnodelistsize == 0)
+                       goto next;
+               if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) == 0) {
+                       vnlru_return_batch(mp);
+                       need_unbusy = true;
+                       mtx_lock(&mountlist_mtx);
+               }
+next:
+               nmp = TAILQ_NEXT(mp, mnt_list);
+               if (need_unbusy)
+                       vfs_unbusy(mp);
+       }
+       mtx_unlock(&mountlist_mtx);
+}
+
 /*
  * Attempt to recycle vnodes in a context that is always safe to block.
  * Calling vlrurecycle() from the bowels of filesystem code has some
@@ -1068,9 +1140,8 @@ vnlru_proc(void)
                 * adjusted using its sysctl, or emergency growth), first
                 * try to reduce it by discarding from the free list.
                 */
-               if (numvnodes > desiredvnodes && freevnodes > 0)
-                       vnlru_free_locked(ulmin(numvnodes - desiredvnodes,
-                           freevnodes), NULL);
+               if (numvnodes > desiredvnodes)
+                       vnlru_free_locked(numvnodes - desiredvnodes, NULL);
                /*
                 * Sleep if the vnode cache is in a good state.  This is
                 * when it is not over-full and has space for about a 4%
@@ -1457,10 +1528,10 @@ delmntque(struct vnode *vp)
        active = vp->v_iflag & VI_ACTIVE;
        vp->v_iflag &= ~VI_ACTIVE;
        if (active) {
-               mtx_lock(&vnode_free_list_mtx);
+               mtx_lock(&mp->mnt_listmtx);
                TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist);
                mp->mnt_activevnodelistsize--;
-               mtx_unlock(&vnode_free_list_mtx);
+               mtx_unlock(&mp->mnt_listmtx);
        }
        vp->v_mount = NULL;
        VI_UNLOCK(vp);
@@ -1525,10 +1596,10 @@ insmntque1(struct vnode *vp, struct moun
        KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
            ("Activating already active vnode"));
        vp->v_iflag |= VI_ACTIVE;
-       mtx_lock(&vnode_free_list_mtx);
+       mtx_lock(&mp->mnt_listmtx);
        TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
        mp->mnt_activevnodelistsize++;
-       mtx_unlock(&vnode_free_list_mtx);
+       mtx_unlock(&mp->mnt_listmtx);
        VI_UNLOCK(vp);
        MNT_IUNLOCK(mp);
        return (0);
@@ -2753,17 +2824,25 @@ _vhold(struct vnode *vp, bool locked)
         * Remove a vnode from the free list, mark it as in use,
         * and put it on the active list.
         */
-       mtx_lock(&vnode_free_list_mtx);
-       TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist);
-       freevnodes--;
-       vp->v_iflag &= ~VI_FREE;
+       mp = vp->v_mount;
+       mtx_lock(&mp->mnt_listmtx);
+       if ((vp->v_mflag & VMP_TMPMNTFREELIST) != 0) {
+               TAILQ_REMOVE(&mp->mnt_tmpfreevnodelist, vp, v_actfreelist);
+               mp->mnt_tmpfreevnodelistsize--;
+               vp->v_mflag &= ~VMP_TMPMNTFREELIST;
+       } else {
+               mtx_lock(&vnode_free_list_mtx);
+               TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist);
+               freevnodes--;
+               mtx_unlock(&vnode_free_list_mtx);
+       }
        KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
            ("Activating already active vnode"));
+       vp->v_iflag &= ~VI_FREE;
        vp->v_iflag |= VI_ACTIVE;
-       mp = vp->v_mount;
        TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
        mp->mnt_activevnodelistsize++;
-       mtx_unlock(&vnode_free_list_mtx);
+       mtx_unlock(&mp->mnt_listmtx);
        refcount_acquire(&vp->v_holdcnt);
        if (!locked)
                VI_UNLOCK(vp);
@@ -2819,21 +2898,25 @@ _vdrop(struct vnode *vp, bool locked)
                if ((vp->v_iflag & VI_OWEINACT) == 0) {
                        vp->v_iflag &= ~VI_ACTIVE;
                        mp = vp->v_mount;
-                       mtx_lock(&vnode_free_list_mtx);
+                       mtx_lock(&mp->mnt_listmtx);
                        if (active) {
                                TAILQ_REMOVE(&mp->mnt_activevnodelist, vp,
                                    v_actfreelist);
                                mp->mnt_activevnodelistsize--;
                        }
-                       TAILQ_INSERT_TAIL(&vnode_free_list, vp,
+                       TAILQ_INSERT_TAIL(&mp->mnt_tmpfreevnodelist, vp,
                            v_actfreelist);
-                       freevnodes++;
+                       mp->mnt_tmpfreevnodelistsize++;
                        vp->v_iflag |= VI_FREE;
-                       mtx_unlock(&vnode_free_list_mtx);
+                       vp->v_mflag |= VMP_TMPMNTFREELIST;
+                       VI_UNLOCK(vp);
+                       if (mp->mnt_tmpfreevnodelistsize >= mnt_free_list_batch)
+                               vnlru_return_batch_locked(mp);
+                       mtx_unlock(&mp->mnt_listmtx);
                } else {
+                       VI_UNLOCK(vp);
                        atomic_add_long(&free_owe_inact, 1);
                }
-               VI_UNLOCK(vp);
                return;
        }
        /*
@@ -3926,6 +4009,9 @@ vfs_msync(struct mount *mp, int flags)
        struct vm_object *obj;
 
        CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
+
+       vnlru_return_batch(mp);
+
        MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) {
                obj = vp->v_object;
                if (obj != NULL && (obj->flags & OBJ_MIGHTBEDIRTY) != 0 &&
@@ -5236,7 +5322,7 @@ mnt_vnode_next_active(struct vnode **mvp
 {
        struct vnode *vp, *nvp;
 
-       mtx_assert(&vnode_free_list_mtx, MA_OWNED);
+       mtx_assert(&mp->mnt_listmtx, MA_OWNED);
        KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 restart:
        vp = TAILQ_NEXT(*mvp, v_actfreelist);
@@ -5249,9 +5335,9 @@ restart:
                if (!VI_TRYLOCK(vp)) {
                        if (mp_ncpus == 1 || should_yield()) {
                                TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist);
-                               mtx_unlock(&vnode_free_list_mtx);
+                               mtx_unlock(&mp->mnt_listmtx);
                                pause("vnacti", 1);
-                               mtx_lock(&vnode_free_list_mtx);
+                               mtx_lock(&mp->mnt_listmtx);
                                goto restart;
                        }
                        continue;
@@ -5268,12 +5354,12 @@ restart:
 
        /* Check if we are done */
        if (vp == NULL) {
-               mtx_unlock(&vnode_free_list_mtx);
+               mtx_unlock(&mp->mnt_listmtx);
                mnt_vnode_markerfree_active(mvp, mp);
                return (NULL);
        }
        TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
-       mtx_unlock(&vnode_free_list_mtx);
+       mtx_unlock(&mp->mnt_listmtx);
        ASSERT_VI_LOCKED(vp, "active iter");
        KASSERT((vp->v_iflag & VI_ACTIVE) != 0, ("Non-active vp %p", vp));
        return (vp);
@@ -5285,7 +5371,7 @@ __mnt_vnode_next_active(struct vnode **m
 
        if (should_yield())
                kern_yield(PRI_USER);
-       mtx_lock(&vnode_free_list_mtx);
+       mtx_lock(&mp->mnt_listmtx);
        return (mnt_vnode_next_active(mvp, mp));
 }
 
@@ -5301,10 +5387,10 @@ __mnt_vnode_first_active(struct vnode **
        (*mvp)->v_type = VMARKER;
        (*mvp)->v_mount = mp;
 
-       mtx_lock(&vnode_free_list_mtx);
+       mtx_lock(&mp->mnt_listmtx);
        vp = TAILQ_FIRST(&mp->mnt_activevnodelist);
        if (vp == NULL) {
-               mtx_unlock(&vnode_free_list_mtx);
+               mtx_unlock(&mp->mnt_listmtx);
                mnt_vnode_markerfree_active(mvp, mp);
                return (NULL);
        }
@@ -5319,8 +5405,8 @@ __mnt_vnode_markerfree_active(struct vno
        if (*mvp == NULL)
                return;
 
-       mtx_lock(&vnode_free_list_mtx);
+       mtx_lock(&mp->mnt_listmtx);
        TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
-       mtx_unlock(&vnode_free_list_mtx);
+       mtx_unlock(&mp->mnt_listmtx);
        mnt_vnode_markerfree_active(mvp, mp);
 }

Modified: head/sys/sys/mount.h
==============================================================================
--- head/sys/sys/mount.h        Fri Sep 30 17:19:43 2016        (r306511)
+++ head/sys/sys/mount.h        Fri Sep 30 17:27:17 2016        (r306512)
@@ -147,6 +147,7 @@ struct vfsopt {
  * put on a doubly linked list.
  *
  * Lock reference:
+ *     l - mnt_listmtx
  *     m - mountlist_mtx
  *     i - interlock
  *     v - vnode freelist mutex
@@ -166,8 +167,6 @@ struct mount {
        int             mnt_ref;                /* (i) Reference count */
        struct vnodelst mnt_nvnodelist;         /* (i) list of vnodes */
        int             mnt_nvnodelistsize;     /* (i) # of vnodes */
-       struct vnodelst mnt_activevnodelist;    /* (v) list of active vnodes */
-       int             mnt_activevnodelistsize;/* (v) # of active vnodes */
        int             mnt_writeopcount;       /* (i) write syscalls pending */
        int             mnt_kern_flag;          /* (i) kernel only flags */
        uint64_t        mnt_flag;               /* (i) flags shared with user */
@@ -188,6 +187,11 @@ struct mount {
        struct thread   *mnt_susp_owner;        /* (i) thread owning suspension 
*/
 #define        mnt_endzero     mnt_gjprovider
        char            *mnt_gjprovider;        /* gjournal provider name */
+       struct mtx      mnt_listmtx;
+       struct vnodelst mnt_activevnodelist;    /* (l) list of active vnodes */
+       int             mnt_activevnodelistsize;/* (l) # of active vnodes */
+       struct vnodelst mnt_tmpfreevnodelist;   /* (l) list of free vnodes */
+       int             mnt_tmpfreevnodelistsize;/* (l) # of free vnodes */
        struct lock     mnt_explock;            /* vfs_export walkers lock */
        TAILQ_ENTRY(mount) mnt_upper_link;      /* (m) we in the all uppers */
        TAILQ_HEAD(, mount) mnt_uppers;         /* (m) upper mounts over us*/

Modified: head/sys/sys/vnode.h
==============================================================================
--- head/sys/sys/vnode.h        Fri Sep 30 17:19:43 2016        (r306511)
+++ head/sys/sys/vnode.h        Fri Sep 30 17:27:17 2016        (r306512)
@@ -75,8 +75,8 @@ struct vpollinfo {
  *
  * Lock reference:
  *     c - namecache mutex
- *     f - freelist mutex
  *     i - interlock
+ *     l - mp mnt_listmtx or freelist mutex
  *     I - updated with atomics, 0->1 and 1->0 transitions with interlock held
  *     m - mount point interlock
  *     p - pollinfo lock
@@ -144,7 +144,7 @@ struct vnode {
        /*
         * The machinery of being a vnode
         */
-       TAILQ_ENTRY(vnode) v_actfreelist;       /* f vnode active/free lists */
+       TAILQ_ENTRY(vnode) v_actfreelist;       /* l vnode active/free lists */
        struct bufobj   v_bufobj;               /* * Buffer cache object */
 
        /*
@@ -167,6 +167,7 @@ struct vnode {
        u_int   v_usecount;                     /* I ref count of users */
        u_int   v_iflag;                        /* i vnode flags (see below) */
        u_int   v_vflag;                        /* v vnode flags */
+       u_int   v_mflag;                        /* l mnt-specific vnode flags */
        int     v_writecount;                   /* v ref count of writers */
        u_int   v_hash;
        enum    vtype v_type;                   /* u vnode type */
@@ -256,6 +257,8 @@ struct xvnode {
 #define        VV_MD           0x0800  /* vnode backs the md device */
 #define        VV_FORCEINSMQ   0x1000  /* force the insmntque to succeed */
 
+#define        VMP_TMPMNTFREELIST      0x0001  /* Vnode is on mnt's tmp free 
list */
+
 /*
  * Vnode attributes.  A field value of VNOVAL represents a field whose value
  * is unavailable (getattr) or which is not to be changed (setattr).
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to