Author: kib
Date: Fri Nov 13 09:42:32 2020
New Revision: 367632
URL: https://svnweb.freebsd.org/changeset/base/367632

Log:
  Allow some VOPs to return ERELOOKUP to indicate VFS operation restart at top 
level.
  
  Restart syscalls and some sync operations when filesystem indicated
  ERELOOKUP condition, mostly for VOPs operating on metdata.  In
  particular, lookup results cached in the inode/v_data is no longer
  valid and needs recalculating.  Right now this should be nop.
  
  Assert that ERELOOKUP is catched everywhere and not returned to
  userspace, by asserting that td_errno != ERELOOKUP on syscall return
  path.
  
  In collaboration with:        pho
  Reviewed by:  mckusick (previous version), markj
  Tested by:    markj (syzkaller), pho
  Sponsored by: The FreeBSD Foundation
  Differential revision:        https://reviews.freebsd.org/D26136

Modified:
  head/sys/kern/subr_syscall.c
  head/sys/kern/uipc_usrreq.c
  head/sys/kern/vfs_subr.c
  head/sys/kern/vfs_syscalls.c

Modified: head/sys/kern/subr_syscall.c
==============================================================================
--- head/sys/kern/subr_syscall.c        Fri Nov 13 09:31:57 2020        
(r367631)
+++ head/sys/kern/subr_syscall.c        Fri Nov 13 09:42:32 2020        
(r367632)
@@ -217,6 +217,8 @@ syscallret(struct thread *td)
 
        KASSERT((td->td_pflags & TDP_FORKING) == 0,
            ("fork() did not clear TDP_FORKING upon completion"));
+       KASSERT(td->td_errno != ERELOOKUP,
+           ("ERELOOKUP not consumed syscall %d", td->td_sa.code));
 
        p = td->td_proc;
        sa = &td->td_sa;

Modified: head/sys/kern/uipc_usrreq.c
==============================================================================
--- head/sys/kern/uipc_usrreq.c Fri Nov 13 09:31:57 2020        (r367631)
+++ head/sys/kern/uipc_usrreq.c Fri Nov 13 09:42:32 2020        (r367632)
@@ -671,6 +671,8 @@ restart:
        vput(nd.ni_dvp);
        if (error) {
                vn_finished_write(mp);
+               if (error == ERELOOKUP)
+                       goto restart;
                goto error;
        }
        vp = nd.ni_vp;

Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c    Fri Nov 13 09:31:57 2020        (r367631)
+++ head/sys/kern/vfs_subr.c    Fri Nov 13 09:42:32 2020        (r367632)
@@ -1937,7 +1937,10 @@ bufobj_invalbuf(struct bufobj *bo, int flags, int slpf
                }
                if (bo->bo_dirty.bv_cnt > 0) {
                        BO_UNLOCK(bo);
-                       if ((error = BO_SYNC(bo, MNT_WAIT)) != 0)
+                       do {
+                               error = BO_SYNC(bo, MNT_WAIT);
+                       } while (error == ERELOOKUP);
+                       if (error != 0)
                                return (error);
                        /*
                         * XXX We could save a lock/unlock if this was only
@@ -3678,7 +3681,9 @@ loop:
                                vm_object_page_clean(vp->v_object, 0, 0, 0);
                                VM_OBJECT_WUNLOCK(vp->v_object);
                        }
-                       error = VOP_FSYNC(vp, MNT_WAIT, td);
+                       do {
+                               error = VOP_FSYNC(vp, MNT_WAIT, td);
+                       } while (error == ERELOOKUP);
                        if (error != 0) {
                                VOP_UNLOCK(vp);
                                vdrop(vp);

Modified: head/sys/kern/vfs_syscalls.c
==============================================================================
--- head/sys/kern/vfs_syscalls.c        Fri Nov 13 09:31:57 2020        
(r367631)
+++ head/sys/kern/vfs_syscalls.c        Fri Nov 13 09:42:32 2020        
(r367632)
@@ -1384,6 +1384,8 @@ restart:
        NDFREE(&nd, NDF_ONLY_PNBUF);
        vput(nd.ni_dvp);
        vn_finished_write(mp);
+       if (error == ERELOOKUP)
+               goto restart;
        return (error);
 }
 
@@ -1470,6 +1472,8 @@ out:
        vput(nd.ni_dvp);
        vn_finished_write(mp);
        NDFREE(&nd, NDF_ONLY_PNBUF);
+       if (error == ERELOOKUP)
+               goto restart;
        return (error);
 }
 
@@ -1568,7 +1572,7 @@ kern_linkat(struct thread *td, int fd1, int fd2, const
                        return (error);
                NDFREE(&nd, NDF_ONLY_PNBUF);
                error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag);
-       } while (error ==  EAGAIN);
+       } while (error ==  EAGAIN || error == ERELOOKUP);
        return (error);
 }
 
@@ -1741,6 +1745,8 @@ out2:
        NDFREE(&nd, NDF_ONLY_PNBUF);
        vput(nd.ni_dvp);
        vn_finished_write(mp);
+       if (error == ERELOOKUP)
+               goto restart;
 out:
        if (segflg != UIO_SYSSPACE)
                uma_zfree(namei_zone, tmppath);
@@ -1791,6 +1797,8 @@ restart:
        NDFREE(&nd, NDF_ONLY_PNBUF);
        vput(nd.ni_dvp);
        vn_finished_write(mp);
+       if (error == ERELOOKUP)
+               goto restart;
        return (error);
 }
 
@@ -1937,6 +1945,8 @@ out:
                vrele(vp);
        else
                vput(vp);
+       if (error == ERELOOKUP)
+               goto restart;
 fdout:
        if (fp != NULL)
                fdrop(fp, td);
@@ -3395,7 +3405,8 @@ kern_truncate(struct thread *td, const char *path, enu
        int error;
 
        if (length < 0)
-               return(EINVAL);
+               return (EINVAL);
+retry:
        NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
        if ((error = namei(&nd)) != 0)
                return (error);
@@ -3424,6 +3435,8 @@ kern_truncate(struct thread *td, const char *path, enu
        vn_finished_write(mp);
        vn_rangelock_unlock(vp, rl_cookie);
        vrele(vp);
+       if (error == ERELOOKUP)
+               goto retry;
        return (error);
 }
 
@@ -3479,6 +3492,7 @@ kern_fsync(struct thread *td, int fd, bool fullsync)
        if (!fullsync)
                /* XXXKIB: compete outstanding aio writes */;
 #endif
+retry:
        error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
        if (error != 0)
                goto drop;
@@ -3498,6 +3512,8 @@ kern_fsync(struct thread *td, int fd, bool fullsync)
        error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td);
        VOP_UNLOCK(vp);
        vn_finished_write(mp);
+       if (error == ERELOOKUP)
+               goto retry;
 drop:
        fdrop(fp, td);
        return (error);
@@ -3679,7 +3695,7 @@ again:
         * are links to the same vnode), then there is nothing to do.
         */
        if (fvp == tvp)
-               error = -1;
+               error = ERESTART;
 #ifdef MAC
        else
                error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
@@ -3708,8 +3724,10 @@ out:
 out1:
        if (fromnd.ni_startdir)
                vrele(fromnd.ni_startdir);
-       if (error == -1)
+       if (error == ERESTART)
                return (0);
+       if (error == ERELOOKUP)
+               goto again;
        return (error);
 }
 
@@ -3803,6 +3821,8 @@ out:
        if (error == 0)
                vput(nd.ni_vp);
        vn_finished_write(mp);
+       if (error == ERELOOKUP)
+               goto restart;
        return (error);
 }
 
@@ -3903,6 +3923,8 @@ out:
                vrele(nd.ni_dvp);
        else
                vput(nd.ni_dvp);
+       if (error == ERELOOKUP)
+               goto restart;
 fdout:
        if (fp != NULL)
                fdrop(fp, td);
@@ -4416,7 +4438,8 @@ kern_fhlinkat(struct thread *td, int fd, const char *p
                if (error != 0)
                        return (error);
                VOP_UNLOCK(vp);
-       } while ((error = kern_linkat_vp(td, vp, fd, path, pathseg)) == EAGAIN);
+               error = kern_linkat_vp(td, vp, fd, path, pathseg);
+       } while (error == EAGAIN || error == ERELOOKUP);
        return (error);
 }
 
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to