Ok, I'm adding -hackers... another email thread got going in -committers.

    Here is a patch set for -stable.  It isn't perfect but it does appear
    to solve the problem.  The one case I don't handle right is if you have
    a hardlinked file and two renames in two different directories on the same
    file occur at the same time... that will (improperly) return an error 
    code when, in fact, it's perfectly acceptable to do that.

    This patch appears to fix the utterly trivial crash reproduction that
    Yevgeniy was able to produce with a couple of simple race scripts running
    in the background.

    What I've done is add a SOFTLOCKLEAF capability to namei().  If set, and
    the file/directory exists, namei() will generate an extra VREF() on 
    the vnode and set the VSOFTLOCK flag in vp->v_flag.  If the vnode already
    has VSOFTLOCK set, namei() will return EINVAL.

    Then in rename() and rmdir() I set SOFTLOCKLEAF for the namei resolution
    and, of course, clean things up when everything is done.

    The ufs_rename() and ufs_rmdir() code no longer have to do the IN_RENAME
    hack at all, because it's now handled.

    This patch set does not yet include fixes to unionfs or the nfs server
    and is for informational purposes only.  Comments?

                                                -Matt

Index: kern/vfs_lookup.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_lookup.c,v
retrieving revision 1.38.2.3
diff -u -r1.38.2.3 vfs_lookup.c
--- kern/vfs_lookup.c   2001/08/31 19:36:49     1.38.2.3
+++ kern/vfs_lookup.c   2001/10/02 19:04:21
@@ -372,11 +372,20 @@
                        error = EISDIR;
                        goto bad;
                }
+               if ((cnp->cn_flags & SOFTLOCKLEAF) &&
+                   (dp->v_flag & VSOFTLOCK)) {
+                       error = EINVAL;
+                       goto bad;
+               }
                if (wantparent) {
                        ndp->ni_dvp = dp;
                        VREF(dp);
                }
                ndp->ni_vp = dp;
+               if (cnp->cn_flags & SOFTLOCKLEAF) {
+                       VREF(dp);
+                       vsetsoftlock(dp);
+               }
                if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF)))
                        VOP_UNLOCK(dp, 0, p);
                /* XXX This should probably move to the top of function. */
@@ -565,13 +574,20 @@
                error = EROFS;
                goto bad2;
        }
+       if ((cnp->cn_flags & SOFTLOCKLEAF) && (dp->v_flag & VSOFTLOCK)) {
+               error = EINVAL;
+               goto bad2;
+       }
        if (cnp->cn_flags & SAVESTART) {
                ndp->ni_startdir = ndp->ni_dvp;
                VREF(ndp->ni_startdir);
        }
        if (!wantparent)
                vrele(ndp->ni_dvp);
-
+       if (cnp->cn_flags & SOFTLOCKLEAF) {
+               VREF(dp);
+               vsetsoftlock(dp);
+       }
        if ((cnp->cn_flags & LOCKLEAF) == 0)
                VOP_UNLOCK(dp, 0, p);
        return (0);
@@ -654,6 +670,15 @@
                        error = ENOTDIR;
                        goto bad;
                }
+               if ((cnp->cn_flags & SOFTLOCKLEAF) &&
+                   (dp->v_flag & VSOFTLOCK)) {
+                       error = EINVAL;
+                       goto bad;
+               }
+               if (cnp->cn_flags & SOFTLOCKLEAF) {
+                       VREF(dp);
+                       vsetsoftlock(dp);
+               }
                if (!(cnp->cn_flags & LOCKLEAF))
                        VOP_UNLOCK(dp, 0, p);
                *vpp = dp;
@@ -707,6 +732,10 @@
                error = EROFS;
                goto bad2;
        }
+       if ((cnp->cn_flags & SOFTLOCKLEAF) && (dp->v_flag & VSOFTLOCK)) {
+               error = EINVAL;
+               goto bad2;
+       }
        /* ASSERT(dvp == ndp->ni_startdir) */
        if (cnp->cn_flags & SAVESTART)
                VREF(dvp);
@@ -718,6 +747,10 @@
                ((cnp->cn_flags & (NOOBJ|LOCKLEAF)) == LOCKLEAF))
                vfs_object_create(dp, cnp->cn_proc, cnp->cn_cred);
 
+       if (cnp->cn_flags & SOFTLOCKLEAF) {
+               VREF(dp);
+               vsetsoftlock(dp);
+       }
        if ((cnp->cn_flags & LOCKLEAF) == 0)
                VOP_UNLOCK(dp, 0, p);
        return (0);
Index: kern/vfs_subr.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_subr.c,v
retrieving revision 1.249.2.11
diff -u -r1.249.2.11 vfs_subr.c
--- kern/vfs_subr.c     2001/09/11 09:49:53     1.249.2.11
+++ kern/vfs_subr.c     2001/10/02 18:45:55
@@ -2927,6 +2961,12 @@
      struct nameidata *ndp;
      const uint flags;
 {
+       if (!(flags & NDF_NO_FREE_SOFTLOCKLEAF) &&
+           (ndp->ni_cnd.cn_flags & SOFTLOCKLEAF) &&
+           ndp->ni_vp) {
+               vclearsoftlock(ndp->ni_vp);
+               vrele(ndp->ni_vp);
+       }
        if (!(flags & NDF_NO_FREE_PNBUF) &&
            (ndp->ni_cnd.cn_flags & HASBUF)) {
                zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
@@ -2955,3 +2995,24 @@
                ndp->ni_startdir = NULL;
        }
 }
+
+void
+vsetsoftlock(struct vnode *vp)
+{
+       int s;
+
+       s = splbio();
+       vp->v_flag |= VSOFTLOCK;
+       splx(s);
+}
+
+void
+vclearsoftlock(struct vnode *vp)
+{
+       int s;
+
+       s = splbio();
+       vp->v_flag &= ~VSOFTLOCK;
+       splx(s);
+}
+
Index: kern/vfs_syscalls.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.151.2.9
diff -u -r1.151.2.9 vfs_syscalls.c
--- kern/vfs_syscalls.c 2001/08/12 10:48:00     1.151.2.9
+++ kern/vfs_syscalls.c 2001/10/02 19:18:06
@@ -2633,8 +2633,8 @@
        int error;
 
        bwillwrite();
-       NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
-           SCARG(uap, from), p);
+       NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | SOFTLOCKLEAF,
+           UIO_USERSPACE, SCARG(uap, from), p);
        if ((error = namei(&fromnd)) != 0)
                return (error);
        fvp = fromnd.ni_vp;
@@ -2646,7 +2646,7 @@
                /* Translate error code for rename("dir1", "dir2/."). */
                if (error == EISDIR && fvp->v_type == VDIR)
                        error = EINVAL;
-               NDFREE(&fromnd, NDF_ONLY_PNBUF);
+               NDFREE(&fromnd, NDF_ONLY_PNBUF & NDF_ONLY_SOFTLOCKLEAF);
                vrele(fromnd.ni_dvp);
                vrele(fvp);
                goto out1;
@@ -2683,12 +2683,14 @@
                if (tvp) {
                        VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
                }
+               fromnd.ni_cnd.cn_flags &= ~SOFTLOCKLEAF;        /* XXX hack */
                error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
                                   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
-               NDFREE(&fromnd, NDF_ONLY_PNBUF);
+               fromnd.ni_cnd.cn_flags |= SOFTLOCKLEAF;
+               NDFREE(&fromnd, NDF_ONLY_PNBUF & NDF_ONLY_SOFTLOCKLEAF);
                NDFREE(&tond, NDF_ONLY_PNBUF);
        } else {
-               NDFREE(&fromnd, NDF_ONLY_PNBUF);
+               NDFREE(&fromnd, NDF_ONLY_PNBUF & NDF_ONLY_SOFTLOCKLEAF);
                NDFREE(&tond, NDF_ONLY_PNBUF);
                if (tdvp == tvp)
                        vrele(tdvp);
@@ -2785,8 +2787,8 @@
        struct nameidata nd;
 
        bwillwrite();
-       NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
-           SCARG(uap, path), p);
+       NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | SOFTLOCKLEAF,
+           UIO_USERSPACE, SCARG(uap, path), p);
        if ((error = namei(&nd)) != 0)
                return (error);
        vp = nd.ni_vp;
@@ -2812,7 +2814,7 @@
                error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
        }
 out:
-       NDFREE(&nd, NDF_ONLY_PNBUF);
+       NDFREE(&nd, NDF_ONLY_PNBUF & NDF_ONLY_SOFTLOCKLEAF);
        if (nd.ni_dvp == vp)
                vrele(nd.ni_dvp);
        else
Index: sys/namei.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/namei.h,v
retrieving revision 1.29.2.2
diff -u -r1.29.2.2 namei.h
--- sys/namei.h 2001/09/30 21:12:54     1.29.2.2
+++ sys/namei.h 2001/10/02 18:43:34
@@ -114,7 +114,7 @@
 #define        FOLLOW          0x0040  /* follow symbolic links */
 #define        NOOBJ           0x0080  /* don't create object */
 #define        NOFOLLOW        0x0000  /* do not follow symbolic links (pseudo) */
-#define        MODMASK         0x00fc  /* mask of operational modifiers */
+#define        MODMASK         (0x00fc|SOFTLOCKLEAF)
 /*
  * Namei parameter descriptors.
  *
@@ -143,7 +143,7 @@
 #define        WILLBEDIR       0x080000 /* new files will be dirs; allow trailing / */
 #define        ISUNICODE       0x100000 /* current component name is unicode*/
 #define        PDIRUNLOCK      0x200000 /* file system lookup() unlocked parent dir */
-#define PARAMASK       0x1fff00 /* mask of parameter descriptors */
+#define SOFTLOCKLEAF   0x400000 /* set VSOFTLOCK in vnode */
 /*
  * Initialization of an nameidata structure.
  */
@@ -180,7 +180,9 @@
 #define NDF_NO_VP_PUT          0x0000000c
 #define NDF_NO_STARTDIR_RELE   0x00000010
 #define NDF_NO_FREE_PNBUF      0x00000020
+#define NDF_NO_FREE_SOFTLOCKLEAF 0x00000040
 #define NDF_ONLY_PNBUF         (~NDF_NO_FREE_PNBUF)
+#define NDF_ONLY_SOFTLOCKLEAF  (~NDF_NO_FREE_SOFTLOCKLEAF)
 
 void NDFREE __P((struct nameidata *, const uint));
 
Index: sys/vnode.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/vnode.h,v
retrieving revision 1.111.2.12
diff -u -r1.111.2.12 vnode.h
--- sys/vnode.h 2001/09/22 09:21:48     1.111.2.12
+++ sys/vnode.h 2001/10/02 18:46:10
@@ -169,6 +169,7 @@
 #define        VTBFREE         0x100000 /* This vnode is on the to-be-freelist */
 #define        VONWORKLST      0x200000 /* On syncer work-list */
 #define        VMOUNT          0x400000 /* Mount in progress */
+#define VSOFTLOCK      0x800000 /* Soft lock on vnode (directory entry) */
 
 /*
  * Vnode attributes.  A field value of VNOVAL represents a field whose value
@@ -630,6 +632,8 @@
 void   vrele __P((struct vnode *vp));
 void   vref __P((struct vnode *vp));
 void   vbusy __P((struct vnode *vp));
+void   vsetsoftlock __P((struct vnode *vp));
+void   vclearsoftlock __P((struct vnode *vp));
 
 extern vop_t   **default_vnodeop_p;
 extern vop_t **spec_vnodeop_p;
Index: ufs/ufs/ufs_vnops.c
===================================================================
RCS file: /home/ncvs/src/sys/ufs/ufs/ufs_vnops.c,v
retrieving revision 1.131.2.4
diff -u -r1.131.2.4 ufs_vnops.c
--- ufs/ufs/ufs_vnops.c 2001/08/28 17:28:49     1.131.2.4
+++ ufs/ufs/ufs_vnops.c 2001/10/02 19:16:40
@@ -997,13 +997,11 @@
                 * Avoid ".", "..", and aliases of "." for obvious reasons.
                 */
                if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
-                   dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
-                   (ip->i_flag & IN_RENAME)) {
+                   dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
                        VOP_UNLOCK(fvp, 0, p);
                        error = EINVAL;
                        goto abortit;
                }
-               ip->i_flag |= IN_RENAME;
                oldparent = dp->i_number;
                doingdirectory = 1;
        }
@@ -1224,13 +1222,12 @@
                return (0);
        }
        /*
-        * Ensure that the directory entry still exists and has not
-        * changed while the new name has been entered. If the source is
-        * a file then the entry may have been unlinked or renamed. In
-        * either case there is no further work to be done. If the source
-        * is a directory then it cannot have been rmdir'ed; the IN_RENAME
-        * flag ensures that it cannot be moved by another rename or removed
-        * by a rmdir.
+        * The source has been VSOFTLOCKd by the caller, preventing rename
+        * or rmdir, but not prevent unlink.  At the moment we allow the unlink
+        * race and so do not panic if relookup fails or the inodes do not
+        * match, because files can be hardlinked and our flag is on the vnode
+        * rather then the directory entry in the namei cache.  But source
+        * directories must still exist.
         */
        if (xp != ip) {
                if (doingdirectory)
@@ -1248,7 +1245,6 @@
                        cache_purge(fdvp);
                }
                error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
-               xp->i_flag &= ~IN_RENAME;
        }
        VN_KNOTE(fvp, NOTE_RENAME);
        if (dp)
@@ -1263,13 +1259,10 @@
                vput(ITOV(xp));
        vput(ITOV(dp));
 out:
-       if (doingdirectory)
-               ip->i_flag &= ~IN_RENAME;
        if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) {
                ip->i_effnlink--;
                ip->i_nlink--;
                ip->i_flag |= IN_CHANGE;
-               ip->i_flag &= ~IN_RENAME;
                if (DOINGSOFTDEP(fvp))
                        softdep_change_linkcnt(ip);
                vput(fvp);
@@ -1503,18 +1496,16 @@
        dp = VTOI(dvp);
 
        /*
-        * Do not remove a directory that is in the process of being renamed.
         * Verify the directory is empty (and valid). Rmdir ".." will not be
         * valid since ".." will contain a reference to the current directory
         * and thus be non-empty. Do not allow the removal of mounted on
         * directories (this can happen when an NFS exported filesystem
         * tries to remove a locally mounted on directory).
+        *
+        * The syscall wrapper already protects us from attempting to remove
+        * a directory that is undergoing a rename.
         */
        error = 0;
-       if (ip->i_flag & IN_RENAME) {
-               error = EINVAL;
-               goto out;
-       }
        if (ip->i_effnlink != 2 ||
            !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
                error = ENOTEMPTY;

To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-hackers" in the body of the message

Reply via email to