Author: mjg
Date: Fri Jan 24 07:44:25 2020
New Revision: 357070
URL: https://svnweb.freebsd.org/changeset/base/357070

Log:
  vfs: stop unlocking the vnode upfront in vput
  
  Doing so runs into races with filesystems which make half-constructed vnodes
  visible to other users, while depending on the chain vput -> vinactive ->
  vrecycle to be executed without dropping the vnode lock.
  
  Impediments for making this work got cleared up (notably vop_unlock_post now
  does not do anything and lockmgr stops touching the lock after the final
  write). Stacked filesystems keep vhold/vdrop across unlock, which arguably can
  now be eliminated.
  
  Reviewed by:  jeff
  Differential Revision:        https://reviews.freebsd.org/D23344

Modified:
  head/sys/kern/vfs_subr.c

Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c    Fri Jan 24 07:42:57 2020        (r357069)
+++ head/sys/kern/vfs_subr.c    Fri Jan 24 07:44:25 2020        (r357070)
@@ -3088,6 +3088,11 @@ enum vputx_op { VPUTX_VRELE, VPUTX_VPUT, VPUTX_VUNREF 
  * Decrement the use and hold counts for a vnode.
  *
  * See an explanation near vget() as to why atomic operation is safe.
+ *
+ * XXX Some filesystems pass in an exclusively locked vnode and strongly depend
+ * on the lock being held all the way until VOP_INACTIVE. This in particular
+ * happens with UFS which adds half-constructed vnodes to the hash, where they
+ * can be found by other code.
  */
 static void
 vputx(struct vnode *vp, enum vputx_op func)
@@ -3097,6 +3102,8 @@ vputx(struct vnode *vp, enum vputx_op func)
        KASSERT(vp != NULL, ("vputx: null vp"));
        if (func == VPUTX_VUNREF)
                ASSERT_VOP_LOCKED(vp, "vunref");
+       else if (func == VPUTX_VPUT)
+               ASSERT_VOP_LOCKED(vp, "vput");
        ASSERT_VI_UNLOCKED(vp, __func__);
        VNASSERT(vp->v_holdcnt > 0 && vp->v_usecount > 0, vp,
            ("%s: wrong ref counts", __func__));
@@ -3112,22 +3119,19 @@ vputx(struct vnode *vp, enum vputx_op func)
         * count which provides liveness of the vnode, in which case we
         * have to vdrop.
         */
-       if (!refcount_release(&vp->v_usecount))
+       if (!refcount_release(&vp->v_usecount)) {
+               if (func == VPUTX_VPUT)
+                       VOP_UNLOCK(vp);
                return;
+       }
        VI_LOCK(vp);
        v_decr_devcount(vp);
        /*
         * By the time we got here someone else might have transitioned
         * the count back to > 0.
         */
-       if (vp->v_usecount > 0) {
-               vdropl(vp);
-               return;
-       }
-       if (vp->v_iflag & VI_DOINGINACT) {
-               vdropl(vp);
-               return;
-       }
+       if (vp->v_usecount > 0 || vp->v_iflag & VI_DOINGINACT)
+               goto out;
 
        /*
         * Check if the fs wants to perform inactive processing. Note we
@@ -3137,10 +3141,8 @@ vputx(struct vnode *vp, enum vputx_op func)
         * here but to drop our hold count.
         */
        if (__predict_false(VN_IS_DOOMED(vp)) ||
-           VOP_NEED_INACTIVE(vp) == 0) {
-               vdropl(vp);
-               return;
-       }
+           VOP_NEED_INACTIVE(vp) == 0)
+               goto out;
 
        /*
         * We must call VOP_INACTIVE with the node locked. Mark
@@ -3153,8 +3155,12 @@ vputx(struct vnode *vp, enum vputx_op func)
                VI_LOCK(vp);
                break;
        case VPUTX_VPUT:
-               error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_NOWAIT);
-               VI_LOCK(vp);
+               error = 0;
+               if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
+                       error = VOP_LOCK(vp, LK_UPGRADE | LK_INTERLOCK |
+                           LK_NOWAIT);
+                       VI_LOCK(vp);
+               }
                break;
        case VPUTX_VUNREF:
                error = 0;
@@ -3177,6 +3183,11 @@ vputx(struct vnode *vp, enum vputx_op func)
        } else {
                vdropl(vp);
        }
+       return;
+out:
+       if (func == VPUTX_VPUT)
+               VOP_UNLOCK(vp);
+       vdropl(vp);
 }
 
 /*
@@ -3194,21 +3205,11 @@ vrele(struct vnode *vp)
  * Release an already locked vnode.  This give the same effects as
  * unlock+vrele(), but takes less time and avoids releasing and
  * re-aquiring the lock (as vrele() acquires the lock internally.)
- *
- * It is an invariant that all VOP_* calls operate on a held vnode.
- * We may be only having an implicit hold stemming from our usecount,
- * which we are about to release. If we unlock the vnode afterwards we
- * open a time window where someone else dropped the last usecount and
- * proceeded to free the vnode before our unlock finished. For this
- * reason we unlock the vnode early. This is a little bit wasteful as
- * it may be the vnode is exclusively locked and inactive processing is
- * needed, in which case we are adding work.
  */
 void
 vput(struct vnode *vp)
 {
 
-       VOP_UNLOCK(vp);
        vputx(vp, VPUTX_VPUT);
 }
 
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to