Author: avg
Date: Thu Aug 11 20:48:03 2016
New Revision: 303970
URL: https://svnweb.freebsd.org/changeset/base/303970

Log:
  MFC r303763,303791,303869: zfs: honour and make use of vfs vnode locking 
protocol
  
  ZFS POSIX Layer is originally written for Solaris VFS which is very
  different from FreeBSD VFS.  Most importantly many things that FreeBSD VFS
  manages on behalf of all filesystems are implemented in ZPL in a different
  way.
  Thus, ZPL contains code that is redundant on FreeBSD or duplicates VFS
  functionality or, in the worst cases, badly interacts / interferes
  with VFS.
  
  The most prominent problem is a deadlock caused by the lock order reversal
  of vnode locks that may happen with concurrent zfs_rename() and lookup().
  The deadlock is a result of zfs_rename() not observing the vnode locking
  contract expected by VFS.
  
  This commit removes all ZPL internal locking that protects parent-child
  relationships of filesystem nodes.  These relationships are protected
  by vnode locks and the code is changed to take advantage of that fact
  and to properly interact with VFS.
  
  Removal of the internal locking allowed all ZPL dmu_tx_assign calls to
  use TXG_WAIT mode.
  
  Another victim, disputable perhaps, is ZFS support for filesystems with
  mixed case sensitivity.  That support is not provided by the OS anyway,
  so in ZFS it was a buch of dead code.
  
  To do:
  - replace ZFS_ENTER mechanism with VFS managed / visible mechanism
  - replace zfs_zget with zfs_vget[f] as much as possible
  - get rid of not really useful now zfs_freebsd_* adapters
  - more cleanups of unneeded / unused code
  - fix / replace .zfs support
  
  PR:           209158
  Approved by:  re (gjb)

Modified:
  stable/11/sys/cddl/compat/opensolaris/sys/vnode.h
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_sa.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/cddl/compat/opensolaris/sys/vnode.h
==============================================================================
--- stable/11/sys/cddl/compat/opensolaris/sys/vnode.h   Thu Aug 11 20:19:02 
2016        (r303969)
+++ stable/11/sys/cddl/compat/opensolaris/sys/vnode.h   Thu Aug 11 20:48:03 
2016        (r303970)
@@ -87,8 +87,6 @@ vn_is_readonly(vnode_t *vp)
 #define        VN_RELE(v)      vrele(v)
 #define        VN_URELE(v)     vput(v)
 
-#define        VOP_REALVP(vp, vpp, ct) (*(vpp) = (vp), 0)
-
 #define        vnevent_create(vp, ct)                  do { } while (0)
 #define        vnevent_link(vp, ct)                    do { } while (0)
 #define        vnevent_remove(vp, dvp, name, ct)       do { } while (0)

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h      
Thu Aug 11 20:19:02 2016        (r303969)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h      
Thu Aug 11 20:48:03 2016        (r303970)
@@ -48,18 +48,18 @@ extern "C" {
 #define        IS_ROOT_NODE    0x01            /* create a root node */
 #define        IS_XATTR        0x02            /* create an extended attribute 
node */
 
-extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
-    int, int *, pathname_t *);
-extern void zfs_dirent_unlock(zfs_dirlock_t *);
-extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int);
-extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
+extern int zfs_dirent_lookup(znode_t *, const char *, znode_t **, int);
+extern int zfs_link_create(znode_t *, const char *, znode_t *, dmu_tx_t *, 
int);
+extern int zfs_link_destroy(znode_t *, const char *, znode_t *, dmu_tx_t *, 
int,
     boolean_t *);
-extern int zfs_dirlook(znode_t *, char *, vnode_t **, int, int *,
-    pathname_t *);
+#if 0
+extern int zfs_dirlook(vnode_t *, const char *, vnode_t **, int);
+#else
+extern int zfs_dirlook(znode_t *, const char *name, znode_t **);
+#endif
 extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
     uint_t, znode_t **, zfs_acl_ids_t *);
 extern void zfs_rmnode(znode_t *);
-extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
 extern boolean_t zfs_dirempty(znode_t *);
 extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
 extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);

Modified: 
stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h   
Thu Aug 11 20:19:02 2016        (r303969)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h   
Thu Aug 11 20:48:03 2016        (r303970)
@@ -75,6 +75,7 @@ struct zfsvfs {
        boolean_t       z_use_fuids;    /* version allows fuids */
        boolean_t       z_replay;       /* set during ZIL replay */
        boolean_t       z_use_sa;       /* version allow system attributes */
+       boolean_t       z_use_namecache;/* make use of FreeBSD name cache */
        uint64_t        z_version;      /* ZPL version */
        uint64_t        z_shares_dir;   /* hidden shares dir */
        kmutex_t        z_lock;

Modified: 
stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h    
Thu Aug 11 20:19:02 2016        (r303969)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h    
Thu Aug 11 20:48:03 2016        (r303970)
@@ -181,10 +181,12 @@ typedef struct znode {
        struct zfsvfs   *z_zfsvfs;
        vnode_t         *z_vnode;
        uint64_t        z_id;           /* object ID for this znode */
+#ifdef illumos
        kmutex_t        z_lock;         /* znode modification lock */
        krwlock_t       z_parent_lock;  /* parent lock for directories */
        krwlock_t       z_name_lock;    /* "master" lock for dirent locks */
        zfs_dirlock_t   *z_dirlocks;    /* directory entry lock list */
+#endif
        kmutex_t        z_range_lock;   /* protects changes to z_range_avl */
        avl_tree_t      z_range_avl;    /* avl tree of file range locks */
        uint8_t         z_unlinked;     /* file has been unlinked */

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c  Thu Aug 
11 20:19:02 2016        (r303969)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c  Thu Aug 
11 20:48:03 2016        (r303970)
@@ -1055,8 +1055,7 @@ zfs_mode_compute(uint64_t fmode, zfs_acl
  * create a new acl and leave any cached acl in place.
  */
 static int
-zfs_acl_node_read(znode_t *zp, boolean_t have_lock, zfs_acl_t **aclpp,
-    boolean_t will_modify)
+zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp, boolean_t will_modify)
 {
        zfs_acl_t       *aclp;
        int             aclsize;
@@ -1065,26 +1064,15 @@ zfs_acl_node_read(znode_t *zp, boolean_t
        zfs_acl_phys_t  znode_acl;
        int             version;
        int             error;
-       boolean_t       drop_lock = B_FALSE;
 
        ASSERT(MUTEX_HELD(&zp->z_acl_lock));
+       ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
 
        if (zp->z_acl_cached && !will_modify) {
                *aclpp = zp->z_acl_cached;
                return (0);
        }
 
-       /*
-        * close race where znode could be upgrade while trying to
-        * read the znode attributes.
-        *
-        * But this could only happen if the file isn't already an SA
-        * znode
-        */
-       if (!zp->z_is_sa && !have_lock) {
-               mutex_enter(&zp->z_lock);
-               drop_lock = B_TRUE;
-       }
        version = zfs_znode_acl_version(zp);
 
        if ((error = zfs_acl_znode_info(zp, &aclsize,
@@ -1130,8 +1118,6 @@ zfs_acl_node_read(znode_t *zp, boolean_t
        if (!will_modify)
                zp->z_acl_cached = aclp;
 done:
-       if (drop_lock)
-               mutex_exit(&zp->z_lock);
        return (error);
 }
 
@@ -1158,10 +1144,10 @@ zfs_acl_chown_setattr(znode_t *zp)
        int error;
        zfs_acl_t *aclp;
 
-       ASSERT(MUTEX_HELD(&zp->z_lock));
+       ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
        ASSERT(MUTEX_HELD(&zp->z_acl_lock));
 
-       if ((error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE)) == 0)
+       if ((error = zfs_acl_node_read(zp, &aclp, B_FALSE)) == 0)
                zp->z_mode = zfs_mode_compute(zp->z_mode, aclp,
                    &zp->z_pflags, zp->z_uid, zp->z_gid);
        return (error);
@@ -1453,18 +1439,17 @@ zfs_acl_chmod_setattr(znode_t *zp, zfs_a
        int error = 0;
 
        mutex_enter(&zp->z_acl_lock);
-       mutex_enter(&zp->z_lock);
+       ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
        if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_DISCARD)
                *aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
        else
-               error = zfs_acl_node_read(zp, B_TRUE, aclp, B_TRUE);
+               error = zfs_acl_node_read(zp, aclp, B_TRUE);
 
        if (error == 0) {
                (*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
                zfs_acl_chmod(ZTOV(zp)->v_type, mode, B_TRUE,
                    (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp);
        }
-       mutex_exit(&zp->z_lock);
        mutex_exit(&zp->z_acl_lock);
 
        return (error);
@@ -1617,6 +1602,7 @@ zfs_acl_ids_create(znode_t *dzp, int fla
        boolean_t       trim = B_FALSE;
        boolean_t       inherited = B_FALSE;
 
+       ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
        bzero(acl_ids, sizeof (zfs_acl_ids_t));
        acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode);
 
@@ -1700,12 +1686,10 @@ zfs_acl_ids_create(znode_t *dzp, int fla
 
        if (acl_ids->z_aclp == NULL) {
                mutex_enter(&dzp->z_acl_lock);
-               mutex_enter(&dzp->z_lock);
                if (!(flag & IS_ROOT_NODE) &&
                    (dzp->z_pflags & ZFS_INHERIT_ACE) &&
                    !(dzp->z_pflags & ZFS_XATTR)) {
-                       VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE,
-                           &paclp, B_FALSE));
+                       VERIFY(0 == zfs_acl_node_read(dzp, &paclp, B_FALSE));
                        acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
                            vap->va_type, paclp, acl_ids->z_mode);
                        inherited = B_TRUE;
@@ -1714,7 +1698,6 @@ zfs_acl_ids_create(znode_t *dzp, int fla
                            zfs_acl_alloc(zfs_acl_version_zp(dzp));
                        acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
                }
-               mutex_exit(&dzp->z_lock);
                mutex_exit(&dzp->z_acl_lock);
 
                if (vap->va_type == VDIR)
@@ -1783,7 +1766,8 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsec
 
        mutex_enter(&zp->z_acl_lock);
 
-       error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
+       ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
+       error = zfs_acl_node_read(zp, &aclp, B_FALSE);
        if (error != 0) {
                mutex_exit(&zp->z_acl_lock);
                return (error);
@@ -1931,6 +1915,7 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsec
        boolean_t       fuid_dirtied;
        uint64_t        acl_obj;
 
+       ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
        if (mask == 0)
                return (SET_ERROR(ENOSYS));
 
@@ -1955,7 +1940,6 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsec
        }
 top:
        mutex_enter(&zp->z_acl_lock);
-       mutex_enter(&zp->z_lock);
 
        tx = dmu_tx_create(zfsvfs->z_os);
 
@@ -1987,7 +1971,6 @@ top:
        zfs_sa_upgrade_txholds(tx, zp);
        error = dmu_tx_assign(tx, TXG_NOWAIT);
        if (error) {
-               mutex_exit(&zp->z_lock);
                mutex_exit(&zp->z_acl_lock);
 
                if (error == ERESTART) {
@@ -2013,7 +1996,6 @@ top:
        if (fuidp)
                zfs_fuid_info_free(fuidp);
        dmu_tx_commit(tx);
-       mutex_exit(&zp->z_lock);
        mutex_exit(&zp->z_acl_lock);
 
        return (error);
@@ -2117,7 +2099,8 @@ zfs_zaccess_aces_check(znode_t *zp, uint
 
        mutex_enter(&zp->z_acl_lock);
 
-       error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
+       ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
+       error = zfs_acl_node_read(zp, &aclp, B_FALSE);
        if (error != 0) {
                mutex_exit(&zp->z_acl_lock);
                return (error);

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c  Thu Aug 
11 20:19:02 2016        (r303969)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c  Thu Aug 
11 20:48:03 2016        (r303970)
@@ -58,96 +58,64 @@
 #include <sys/extdirent.h>
 
 /*
- * zfs_match_find() is used by zfs_dirent_lock() to peform zap lookups
+ * zfs_match_find() is used by zfs_dirent_lookup() to peform zap lookups
  * of names after deciding which is the appropriate lookup interface.
  */
 static int
-zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact,
-    boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid)
+zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, const char *name,
+    boolean_t exact, uint64_t *zoid)
 {
        int error;
 
        if (zfsvfs->z_norm) {
-               matchtype_t mt = MT_FIRST;
-               boolean_t conflict = B_FALSE;
-               size_t bufsz = 0;
-               char *buf = NULL;
-
-               if (rpnp) {
-                       buf = rpnp->pn_buf;
-                       bufsz = rpnp->pn_bufsize;
-               }
-               if (exact)
-                       mt = MT_EXACT;
+               matchtype_t mt = exact? MT_EXACT : MT_FIRST;
+
                /*
                 * In the non-mixed case we only expect there would ever
                 * be one match, but we need to use the normalizing lookup.
                 */
                error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
-                   zoid, mt, buf, bufsz, &conflict);
-               if (!error && deflags)
-                       *deflags = conflict ? ED_CASE_CONFLICT : 0;
+                   zoid, mt, NULL, 0, NULL);
        } else {
                error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
        }
        *zoid = ZFS_DIRENT_OBJ(*zoid);
 
-       if (error == ENOENT && update)
-               dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE);
-
        return (error);
 }
 
 /*
- * Lock a directory entry.  A dirlock on <dzp, name> protects that name
- * in dzp's directory zap object.  As long as you hold a dirlock, you can
- * assume two things: (1) dzp cannot be reaped, and (2) no other thread
- * can change the zap entry for (i.e. link or unlink) this name.
+ * Look up a directory entry under a locked vnode.
+ * dvp being locked gives us a guarantee that there are no concurrent
+ * modification of the directory and, thus, if a node can be found in
+ * the directory, then it must not be unlinked.
  *
  * Input arguments:
  *     dzp     - znode for directory
  *     name    - name of entry to lock
  *     flag    - ZNEW: if the entry already exists, fail with EEXIST.
  *               ZEXISTS: if the entry does not exist, fail with ENOENT.
- *               ZSHARED: allow concurrent access with other ZSHARED callers.
  *               ZXATTR: we want dzp's xattr directory
- *               ZCILOOK: On a mixed sensitivity file system,
- *                        this lookup should be case-insensitive.
- *               ZCIEXACT: On a purely case-insensitive file system,
- *                         this lookup should be case-sensitive.
- *               ZRENAMING: we are locking for renaming, force narrow locks
- *               ZHAVELOCK: Don't grab the z_name_lock for this call. The
- *                          current thread already holds it.
  *
  * Output arguments:
  *     zpp     - pointer to the znode for the entry (NULL if there isn't one)
- *     dlpp    - pointer to the dirlock for this entry (NULL on error)
- *      direntflags - (case-insensitive lookup only)
- *             flags if multiple case-sensitive matches exist in directory
- *      realpnp     - (case-insensitive lookup only)
- *             actual name matched within the directory
  *
  * Return value: 0 on success or errno on failure.
  *
  * NOTE: Always checks for, and rejects, '.' and '..'.
- * NOTE: For case-insensitive file systems we take wide locks (see below),
- *      but return znode pointers to a single match.
  */
 int
-zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
-    int flag, int *direntflags, pathname_t *realpnp)
+zfs_dirent_lookup(znode_t *dzp, const char *name, znode_t **zpp, int flag)
 {
        zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
-       zfs_dirlock_t   *dl;
-       boolean_t       update;
        boolean_t       exact;
        uint64_t        zoid;
        vnode_t         *vp = NULL;
        int             error = 0;
-       int             cmpflags;
+
+       ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
 
        *zpp = NULL;
-       *dlpp = NULL;
 
        /*
         * Verify that we are not trying to lock '.', '..', or '.zfs'
@@ -161,280 +129,93 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, zn
         * Case sensitivity and normalization preferences are set when
         * the file system is created.  These are stored in the
         * zfsvfs->z_case and zfsvfs->z_norm fields.  These choices
-        * affect what vnodes can be cached in the DNLC, how we
-        * perform zap lookups, and the "width" of our dirlocks.
+        * affect how we perform zap lookups.
         *
-        * A normal dirlock locks a single name.  Note that with
-        * normalization a name can be composed multiple ways, but
-        * when normalized, these names all compare equal.  A wide
-        * dirlock locks multiple names.  We need these when the file
-        * system is supporting mixed-mode access.  It is sometimes
-        * necessary to lock all case permutations of file name at
-        * once so that simultaneous case-insensitive/case-sensitive
-        * behaves as rationally as possible.
-        */
-
-       /*
         * Decide if exact matches should be requested when performing
         * a zap lookup on file systems supporting case-insensitive
         * access.
-        */
-       exact =
-           ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) ||
-           ((zfsvfs->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK));
-
-       /*
-        * Only look in or update the DNLC if we are looking for the
-        * name on a file system that does not require normalization
-        * or case folding.  We can also look there if we happen to be
-        * on a non-normalizing, mixed sensitivity file system IF we
-        * are looking for the exact name.
         *
-        * Maybe can add TO-UPPERed version of name to dnlc in ci-only
-        * case for performance improvement?
+        * NB: we do not need to worry about this flag for ZFS_CASE_SENSITIVE
+        * because in that case MT_EXACT and MT_FIRST should produce exactly
+        * the same result.
         */
-       update = !zfsvfs->z_norm ||
-           ((zfsvfs->z_case == ZFS_CASE_MIXED) &&
-           !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
+       exact = zfsvfs->z_case == ZFS_CASE_MIXED;
 
-       /*
-        * ZRENAMING indicates we are in a situation where we should
-        * take narrow locks regardless of the file system's
-        * preferences for normalizing and case folding.  This will
-        * prevent us deadlocking trying to grab the same wide lock
-        * twice if the two names happen to be case-insensitive
-        * matches.
-        */
-       if (flag & ZRENAMING)
-               cmpflags = 0;
-       else
-               cmpflags = zfsvfs->z_norm;
-
-       /*
-        * Wait until there are no locks on this name.
-        *
-        * Don't grab the the lock if it is already held. However, cannot
-        * have both ZSHARED and ZHAVELOCK together.
-        */
-       ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK));
-       if (!(flag & ZHAVELOCK))
-               rw_enter(&dzp->z_name_lock, RW_READER);
-
-       mutex_enter(&dzp->z_lock);
-       for (;;) {
-               if (dzp->z_unlinked && !(flag & ZXATTR)) {
-                       mutex_exit(&dzp->z_lock);
-                       if (!(flag & ZHAVELOCK))
-                               rw_exit(&dzp->z_name_lock);
-                       return (SET_ERROR(ENOENT));
-               }
-               for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
-                       if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
-                           U8_UNICODE_LATEST, &error) == 0) || error != 0)
-                               break;
-               }
-               if (error != 0) {
-                       mutex_exit(&dzp->z_lock);
-                       if (!(flag & ZHAVELOCK))
-                               rw_exit(&dzp->z_name_lock);
-                       return (SET_ERROR(ENOENT));
-               }
-               if (dl == NULL) {
-                       size_t namesize;
-
-                       /*
-                        * Allocate a new dirlock and add it to the list.
-                        */
-                       namesize = strlen(name) + 1;
-                       dl = kmem_alloc(sizeof (zfs_dirlock_t) + namesize,
-                           KM_SLEEP);
-                       cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
-                       dl->dl_name = (char *)(dl + 1);
-                       bcopy(name, dl->dl_name, namesize);
-                       dl->dl_sharecnt = 0;
-                       dl->dl_namelock = 0;
-                       dl->dl_namesize = namesize;
-                       dl->dl_dzp = dzp;
-                       dl->dl_next = dzp->z_dirlocks;
-                       dzp->z_dirlocks = dl;
-                       break;
-               }
-               if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
-                       break;
-               cv_wait(&dl->dl_cv, &dzp->z_lock);
-       }
-
-       /*
-        * If the z_name_lock was NOT held for this dirlock record it.
-        */
-       if (flag & ZHAVELOCK)
-               dl->dl_namelock = 1;
-
-       if (flag & ZSHARED)
-               dl->dl_sharecnt++;
-
-       mutex_exit(&dzp->z_lock);
-
-       /*
-        * We have a dirlock on the name.  (Note that it is the dirlock,
-        * not the dzp's z_lock, that protects the name in the zap object.)
-        * See if there's an object by this name; if so, put a hold on it.
-        */
+       if (dzp->z_unlinked && !(flag & ZXATTR))
+               return (ENOENT);
        if (flag & ZXATTR) {
                error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
                    sizeof (zoid));
                if (error == 0)
                        error = (zoid == 0 ? ENOENT : 0);
        } else {
-               if (update)
-                       vp = dnlc_lookup(ZTOV(dzp), name);
-               if (vp == DNLC_NO_VNODE) {
-                       VN_RELE(vp);
-                       error = SET_ERROR(ENOENT);
-               } else if (vp) {
-                       if (flag & ZNEW) {
-                               zfs_dirent_unlock(dl);
-                               VN_RELE(vp);
-                               return (SET_ERROR(EEXIST));
-                       }
-                       *dlpp = dl;
-                       *zpp = VTOZ(vp);
-                       return (0);
-               } else {
-                       error = zfs_match_find(zfsvfs, dzp, name, exact,
-                           update, direntflags, realpnp, &zoid);
-               }
+               error = zfs_match_find(zfsvfs, dzp, name, exact, &zoid);
        }
        if (error) {
                if (error != ENOENT || (flag & ZEXISTS)) {
-                       zfs_dirent_unlock(dl);
                        return (error);
                }
        } else {
                if (flag & ZNEW) {
-                       zfs_dirent_unlock(dl);
                        return (SET_ERROR(EEXIST));
                }
                error = zfs_zget(zfsvfs, zoid, zpp);
-               if (error) {
-                       zfs_dirent_unlock(dl);
+               if (error)
                        return (error);
-               }
-               if (!(flag & ZXATTR) && update)
-                       dnlc_update(ZTOV(dzp), name, ZTOV(*zpp));
+               ASSERT(!(*zpp)->z_unlinked);
        }
 
-       *dlpp = dl;
-
        return (0);
 }
 
-/*
- * Unlock this directory entry and wake anyone who was waiting for it.
- */
-void
-zfs_dirent_unlock(zfs_dirlock_t *dl)
+static int
+zfs_dd_lookup(znode_t *dzp, znode_t **zpp)
 {
-       znode_t *dzp = dl->dl_dzp;
-       zfs_dirlock_t **prev_dl, *cur_dl;
+       zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
+       znode_t *zp;
+       uint64_t parent;
+       int error;
 
-       mutex_enter(&dzp->z_lock);
+       ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
+       ASSERT(RRM_READ_HELD(&zfsvfs->z_teardown_lock));
 
-       if (!dl->dl_namelock)
-               rw_exit(&dzp->z_name_lock);
+       if (dzp->z_unlinked)
+               return (ENOENT);
 
-       if (dl->dl_sharecnt > 1) {
-               dl->dl_sharecnt--;
-               mutex_exit(&dzp->z_lock);
-               return;
-       }
-       prev_dl = &dzp->z_dirlocks;
-       while ((cur_dl = *prev_dl) != dl)
-               prev_dl = &cur_dl->dl_next;
-       *prev_dl = dl->dl_next;
-       cv_broadcast(&dl->dl_cv);
-       mutex_exit(&dzp->z_lock);
+       if ((error = sa_lookup(dzp->z_sa_hdl,
+           SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
+               return (error);
 
-       cv_destroy(&dl->dl_cv);
-       kmem_free(dl, sizeof (*dl) + dl->dl_namesize);
+       error = zfs_zget(zfsvfs, parent, &zp);
+       if (error == 0)
+               *zpp = zp;
+       return (error);
 }
 
-/*
- * Look up an entry in a directory.
- *
- * NOTE: '.' and '..' are handled as special cases because
- *     no directory entries are actually stored for them.  If this is
- *     the root of a filesystem, then '.zfs' is also treated as a
- *     special pseudo-directory.
- */
 int
-zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags,
-    int *deflg, pathname_t *rpnp)
+zfs_dirlook(znode_t *dzp, const char *name, znode_t **zpp)
 {
-       zfs_dirlock_t *dl;
+       zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
        znode_t *zp;
        int error = 0;
-       uint64_t parent;
-       int unlinked;
-
-       if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
-               mutex_enter(&dzp->z_lock);
-               unlinked = dzp->z_unlinked;
-               mutex_exit(&dzp->z_lock);
-               if (unlinked)
-                       return (ENOENT);
 
-               *vpp = ZTOV(dzp);
-               VN_HOLD(*vpp);
-       } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
-               zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
+       ASSERT_VOP_LOCKED(ZTOV(dzp), __func__);
+       ASSERT(RRM_READ_HELD(&zfsvfs->z_teardown_lock));
 
-               /*
-                * If we are a snapshot mounted under .zfs, return
-                * the vp for the snapshot directory.
-                */
-               if ((error = sa_lookup(dzp->z_sa_hdl,
-                   SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
-                       return (error);
-               if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) {
-                       error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
-                           "snapshot", vpp, NULL, 0, NULL, kcred,
-                           NULL, NULL, NULL);
-                       return (error);
-               }
-
-               mutex_enter(&dzp->z_lock);
-               unlinked = dzp->z_unlinked;
-               mutex_exit(&dzp->z_lock);
-               if (unlinked)
-                       return (ENOENT);
+       if (dzp->z_unlinked)
+               return (SET_ERROR(ENOENT));
 
-               rw_enter(&dzp->z_parent_lock, RW_READER);
-               error = zfs_zget(zfsvfs, parent, &zp);
-               if (error == 0)
-                       *vpp = ZTOV(zp);
-               rw_exit(&dzp->z_parent_lock);
-       } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
-               *vpp = zfsctl_root(dzp);
+       if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
+               *zpp = dzp;
+       } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
+               error = zfs_dd_lookup(dzp, zpp);
        } else {
-               int zf;
-
-               zf = ZEXISTS | ZSHARED;
-               if (flags & FIGNORECASE)
-                       zf |= ZCILOOK;
-
-               error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp);
+               error = zfs_dirent_lookup(dzp, name, &zp, ZEXISTS);
                if (error == 0) {
-                       *vpp = ZTOV(zp);
-                       zfs_dirent_unlock(dl);
                        dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
+                       *zpp = zp;
                }
-               rpnp = NULL;
        }
-
-       if ((flags & FIGNORECASE) && rpnp && !error)
-               (void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize);
-
        return (error);
 }
 
@@ -510,8 +291,9 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
                if (error != 0)
                        continue;
 
+               vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_RETRY);
                zp->z_unlinked = B_TRUE;
-               VN_RELE(ZTOV(zp));
+               vput(ZTOV(zp));
        }
        zap_cursor_fini(&zc);
 }
@@ -535,7 +317,6 @@ zfs_purgedir(znode_t *dzp)
        znode_t         *xzp;
        dmu_tx_t        *tx;
        zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
-       zfs_dirlock_t   dl;
        int skipped = 0;
        int error;
 
@@ -549,6 +330,7 @@ zfs_purgedir(znode_t *dzp)
                        continue;
                }
 
+               vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY);
                ASSERT((ZTOV(xzp)->v_type == VREG) ||
                    (ZTOV(xzp)->v_type == VLNK));
 
@@ -563,20 +345,17 @@ zfs_purgedir(znode_t *dzp)
                error = dmu_tx_assign(tx, TXG_WAIT);
                if (error) {
                        dmu_tx_abort(tx);
-                       VN_RELE(ZTOV(xzp));
+                       vput(ZTOV(xzp));
                        skipped += 1;
                        continue;
                }
-               bzero(&dl, sizeof (dl));
-               dl.dl_dzp = dzp;
-               dl.dl_name = zap.za_name;
 
-               error = zfs_link_destroy(&dl, xzp, tx, 0, NULL);
+               error = zfs_link_destroy(dzp, zap.za_name, xzp, tx, 0, NULL);
                if (error)
                        skipped += 1;
                dmu_tx_commit(tx);
 
-               VN_RELE(ZTOV(xzp));
+               vput(ZTOV(xzp));
        }
        zap_cursor_fini(&zc);
        if (error != ENOENT)
@@ -596,6 +375,7 @@ zfs_rmnode(znode_t *zp)
        int             error;
 
        ASSERT(zp->z_links == 0);
+       ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
 
        /*
         * If this is an attribute directory, purge its contents.
@@ -640,7 +420,8 @@ zfs_rmnode(znode_t *zp)
            &xattr_obj, sizeof (xattr_obj));
        if (error == 0 && xattr_obj) {
                error = zfs_zget(zfsvfs, xattr_obj, &xzp);
-               ASSERT(error == 0);
+               ASSERT3S(error, ==, 0);
+               vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY);
        }
 
        acl_obj = zfs_external_acl(zp);
@@ -674,12 +455,10 @@ zfs_rmnode(znode_t *zp)
 
        if (xzp) {
                ASSERT(error == 0);
-               mutex_enter(&xzp->z_lock);
                xzp->z_unlinked = B_TRUE;       /* mark xzp for deletion */
                xzp->z_links = 0;       /* no more links to it */
                VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
                    &xzp->z_links, sizeof (xzp->z_links), tx));
-               mutex_exit(&xzp->z_lock);
                zfs_unlinked_add(xzp, tx);
        }
 
@@ -692,7 +471,7 @@ zfs_rmnode(znode_t *zp)
        dmu_tx_commit(tx);
 out:
        if (xzp)
-               VN_RELE(ZTOV(xzp));
+               vput(ZTOV(xzp));
 }
 
 static uint64_t
@@ -706,12 +485,12 @@ zfs_dirent(znode_t *zp, uint64_t mode)
 }
 
 /*
- * Link zp into dl.  Can only fail if zp has been unlinked.
+ * Link zp into dzp.  Can only fail if zp has been unlinked.
  */
 int
-zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
+zfs_link_create(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
+    int flag)
 {
-       znode_t *dzp = dl->dl_dzp;
        zfsvfs_t *zfsvfs = zp->z_zfsvfs;
        vnode_t *vp = ZTOV(zp);
        uint64_t value;
@@ -721,18 +500,32 @@ zfs_link_create(zfs_dirlock_t *dl, znode
        int count = 0;
        int error;
 
-       mutex_enter(&zp->z_lock);
-
+       ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
+       ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
+#if 0
+       if (zp_is_dir) {
+               error = 0;
+               if (dzp->z_links >= LINK_MAX)
+                       error = SET_ERROR(EMLINK);
+               return (error);
+       }
+#endif
        if (!(flag & ZRENAMING)) {
                if (zp->z_unlinked) {   /* no new links to unlinked zp */
                        ASSERT(!(flag & (ZNEW | ZEXISTS)));
-                       mutex_exit(&zp->z_lock);
                        return (SET_ERROR(ENOENT));
                }
+#if 0
+               if (zp->z_links >= LINK_MAX) {
+                       return (SET_ERROR(EMLINK));
+               }
+#endif
                zp->z_links++;
                SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
                    &zp->z_links, sizeof (zp->z_links));
 
+       } else {
+               ASSERT(zp->z_unlinked == 0);
        }
        SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
            &dzp->z_id, sizeof (dzp->z_id));
@@ -746,11 +539,8 @@ zfs_link_create(zfs_dirlock_t *dl, znode
                    ctime, B_TRUE);
        }
        error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
-       ASSERT(error == 0);
-
-       mutex_exit(&zp->z_lock);
+       ASSERT0(error);
 
-       mutex_enter(&dzp->z_lock);
        dzp->z_size++;
        dzp->z_links += zp_is_dir;
        count = 0;
@@ -766,55 +556,48 @@ zfs_link_create(zfs_dirlock_t *dl, znode
            &dzp->z_pflags, sizeof (dzp->z_pflags));
        zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
        error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
-       ASSERT(error == 0);
-       mutex_exit(&dzp->z_lock);
+       ASSERT0(error);
 
        value = zfs_dirent(zp, zp->z_mode);
-       error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name,
+       error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, name,
            8, 1, &value, tx);
-       ASSERT(error == 0);
-
-       dnlc_update(ZTOV(dzp), dl->dl_name, vp);
+       VERIFY0(error);
 
        return (0);
 }
 
 static int
-zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
+zfs_dropname(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
     int flag)
 {
        int error;
 
        if (zp->z_zfsvfs->z_norm) {
-               if (((zp->z_zfsvfs->z_case == ZFS_CASE_INSENSITIVE) &&
-                   (flag & ZCIEXACT)) ||
-                   ((zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) &&
-                   !(flag & ZCILOOK)))
+               if (zp->z_zfsvfs->z_case == ZFS_CASE_MIXED)
                        error = zap_remove_norm(zp->z_zfsvfs->z_os,
-                           dzp->z_id, dl->dl_name, MT_EXACT, tx);
+                           dzp->z_id, name, MT_EXACT, tx);
                else
                        error = zap_remove_norm(zp->z_zfsvfs->z_os,
-                           dzp->z_id, dl->dl_name, MT_FIRST, tx);
+                           dzp->z_id, name, MT_FIRST, tx);
        } else {
                error = zap_remove(zp->z_zfsvfs->z_os,
-                   dzp->z_id, dl->dl_name, tx);
+                   dzp->z_id, name, tx);
        }
 
        return (error);
 }
 
 /*
- * Unlink zp from dl, and mark zp for deletion if this was the last link.
+ * Unlink zp from dzp, and mark zp for deletion if this was the last link.
  * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
  * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
  * If it's non-NULL, we use it to indicate whether the znode needs deletion,
  * and it's the caller's job to do it.
  */
 int
-zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
-    boolean_t *unlinkedp)
+zfs_link_destroy(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx,
+    int flag, boolean_t *unlinkedp)
 {
-       znode_t *dzp = dl->dl_dzp;
        zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
        vnode_t *vp = ZTOV(zp);
        int zp_is_dir = (vp->v_type == VDIR);
@@ -824,22 +607,12 @@ zfs_link_destroy(zfs_dirlock_t *dl, znod
        int count = 0;
        int error;
 
-       dnlc_remove(ZTOV(dzp), dl->dl_name);
+       ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
+       ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
 
        if (!(flag & ZRENAMING)) {
-               if (vn_vfswlock(vp))            /* prevent new mounts on zp */
-                       return (SET_ERROR(EBUSY));
-
-               if (vn_ismntpt(vp)) {           /* don't remove mount point */
-                       vn_vfsunlock(vp);
-                       return (SET_ERROR(EBUSY));
-               }
-
-               mutex_enter(&zp->z_lock);
 
                if (zp_is_dir && !zfs_dirempty(zp)) {
-                       mutex_exit(&zp->z_lock);
-                       vn_vfsunlock(vp);
 #ifdef illumos
                        return (SET_ERROR(EEXIST));
 #else
@@ -852,10 +625,8 @@ zfs_link_destroy(zfs_dirlock_t *dl, znod
                 * First try removing the name from the directory; if that
                 * fails, return the error.
                 */
-               error = zfs_dropname(dl, zp, dzp, tx, flag);
+               error = zfs_dropname(dzp, name, zp, tx, flag);
                if (error != 0) {
-                       mutex_exit(&zp->z_lock);
-                       vn_vfsunlock(vp);
                        return (error);
                }
 
@@ -882,16 +653,14 @@ zfs_link_destroy(zfs_dirlock_t *dl, znod
                    NULL, &zp->z_links, sizeof (zp->z_links));
                error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
                count = 0;
-               ASSERT(error == 0);
-               mutex_exit(&zp->z_lock);
-               vn_vfsunlock(vp);
+               ASSERT0(error);
        } else {
-               error = zfs_dropname(dl, zp, dzp, tx, flag);
+               ASSERT(zp->z_unlinked == 0);
+               error = zfs_dropname(dzp, name, zp, tx, flag);
                if (error != 0)
                        return (error);
        }
 
-       mutex_enter(&dzp->z_lock);
        dzp->z_size--;          /* one dirent removed */
        dzp->z_links -= zp_is_dir;      /* ".." link from zp */
        SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
@@ -906,8 +675,7 @@ zfs_link_destroy(zfs_dirlock_t *dl, znod
            NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
        zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
        error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
-       ASSERT(error == 0);
-       mutex_exit(&dzp->z_lock);
+       ASSERT0(error);
 
        if (unlinkedp != NULL)
                *unlinkedp = unlinked;
@@ -918,14 +686,12 @@ zfs_link_destroy(zfs_dirlock_t *dl, znod
 }
 
 /*
- * Indicate whether the directory is empty.  Works with or without z_lock
- * held, but can only be consider a hint in the latter case.  Returns true
- * if only "." and ".." remain and there's no work in progress.
+ * Indicate whether the directory is empty.
  */
 boolean_t
 zfs_dirempty(znode_t *dzp)
 {
-       return (dzp->z_size == 2 && dzp->z_dirlocks == 0);
+       return (dzp->z_size == 2);
 }
 
 int
@@ -1019,23 +785,20 @@ zfs_get_xattrdir(znode_t *zp, vnode_t **
 {
        zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
        znode_t         *xzp;
-       zfs_dirlock_t   *dl;
        vattr_t         va;
        int             error;
 top:
-       error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL);
+       error = zfs_dirent_lookup(zp, "", &xzp, ZXATTR);
        if (error)
                return (error);
 
        if (xzp != NULL) {
                *xvpp = ZTOV(xzp);
-               zfs_dirent_unlock(dl);
                return (0);
        }
 
 
        if (!(flags & CREATE_XATTR_DIR)) {
-               zfs_dirent_unlock(dl);
 #ifdef illumos
                return (SET_ERROR(ENOENT));
 #else
@@ -1044,7 +807,6 @@ top:
        }
 
        if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
-               zfs_dirent_unlock(dl);
                return (SET_ERROR(EROFS));

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to