Author: avg
Date: Wed Oct 16 09:04:53 2019
New Revision: 353634
URL: https://svnweb.freebsd.org/changeset/base/353634

Log:
  MFV r348596: 9689 zfs range lock code should not be zpl-specific
  
  illumos/illumos-gate@7931524763ef94dc16989451dddd206563d03bb4
  
  FreeBSD note: some tweaking was needed to avoid a conflict with
  sys/rangelock.h.
  
  Author:       Matthew Ahrens <mahr...@delphix.com>
  Obtained from:        illumos
  MFC after:    3 weeks

Modified:
  head/cddl/contrib/opensolaris/cmd/ztest/ztest.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_rlock.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
Directory Properties:
  head/cddl/contrib/opensolaris/   (props changed)
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/cddl/contrib/opensolaris/cmd/ztest/ztest.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/ztest/ztest.c     Wed Oct 16 08:56:07 
2019        (r353633)
+++ head/cddl/contrib/opensolaris/cmd/ztest/ztest.c     Wed Oct 16 09:04:53 
2019        (r353634)
@@ -242,7 +242,9 @@ typedef struct bufwad {
 } bufwad_t;
 
 /*
- * XXX -- fix zfs range locks to be generic so we can use them here.
+ * It would be better to use a rangelock_t per object.  Unfortunately
+ * the rangelock_t is not a drop-in replacement for rl_t, because we
+ * still need to map from object ID to rangelock_t.
  */
 typedef enum {
        RL_READER,
@@ -1943,12 +1945,12 @@ static void
 ztest_get_done(zgd_t *zgd, int error)
 {
        ztest_ds_t *zd = zgd->zgd_private;
-       uint64_t object = zgd->zgd_rl->rl_object;
+       uint64_t object = ((rl_t *)zgd->zgd_lr)->rl_object;
 
        if (zgd->zgd_db)
                dmu_buf_rele(zgd->zgd_db, zgd);
 
-       ztest_range_unlock(zgd->zgd_rl);
+       ztest_range_unlock((rl_t *)zgd->zgd_lr);
        ztest_object_unlock(zd, object);
 
        umem_free(zgd, sizeof (*zgd));
@@ -1998,8 +2000,8 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, s
        zgd->zgd_private = zd;
 
        if (buf != NULL) {      /* immediate write */
-               zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
-                   RL_READER);
+               zgd->zgd_lr = (struct locked_range *)ztest_range_lock(zd,
+                   object, offset, size, RL_READER);
 
                error = dmu_read(os, object, offset, size, buf,
                    DMU_READ_NO_PREFETCH);
@@ -2013,8 +2015,8 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, s
                        offset = 0;
                }
 
-               zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
-                   RL_READER);
+               zgd->zgd_lr = (struct locked_range *)ztest_range_lock(zd,
+                   object, offset, size, RL_READER);
 
                error = dmu_buf_hold(os, object, offset, zgd, &db,
                    DMU_READ_NO_PREFETCH);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h       Wed Oct 
16 08:56:07 2019        (r353633)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h       Wed Oct 
16 09:04:53 2019        (r353634)
@@ -75,6 +75,7 @@ struct arc_buf;
 struct zio_prop;
 struct sa_handle;
 struct file;
+struct locked_range;
 
 typedef struct objset objset_t;
 typedef struct dmu_tx dmu_tx_t;
@@ -966,7 +967,7 @@ typedef struct zgd {
        struct lwb      *zgd_lwb;
        struct blkptr   *zgd_bp;
        dmu_buf_t       *zgd_db;
-       struct rl       *zgd_rl;
+       struct locked_range *zgd_lr;
        void            *zgd_private;
 } zgd_t;
 

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_rlock.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_rlock.h Wed Oct 
16 08:56:07 2019        (r353633)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_rlock.h Wed Oct 
16 09:04:53 2019        (r353634)
@@ -22,6 +22,9 @@
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2018 by Delphix. All rights reserved.
+ */
 
 #ifndef        _SYS_FS_ZFS_RLOCK_H
 #define        _SYS_FS_ZFS_RLOCK_H
@@ -30,54 +33,53 @@
 extern "C" {
 #endif
 
-#ifdef _KERNEL
+#ifdef __FreeBSD__
+#define        rangelock_init          zfs_rangelock_init
+#define        rangelock_fini          zfs_rangelock_fini
+#endif
 
-#include <sys/zfs_znode.h>
-
 typedef enum {
        RL_READER,
        RL_WRITER,
        RL_APPEND
-} rl_type_t;
+} rangelock_type_t;
 
-typedef struct rl {
-       znode_t *r_zp;          /* znode this lock applies to */
-       avl_node_t r_node;      /* avl node link */
-       uint64_t r_off;         /* file range offset */
-       uint64_t r_len;         /* file range length */
-       uint_t r_cnt;           /* range reference count in tree */
-       rl_type_t r_type;       /* range type */
-       kcondvar_t r_wr_cv;     /* cv for waiting writers */
-       kcondvar_t r_rd_cv;     /* cv for waiting readers */
-       uint8_t r_proxy;        /* acting for original range */
-       uint8_t r_write_wanted; /* writer wants to lock this range */
-       uint8_t r_read_wanted;  /* reader wants to lock this range */
-} rl_t;
+struct locked_range;
 
-/*
- * Lock a range (offset, length) as either shared (RL_READER)
- * or exclusive (RL_WRITER or RL_APPEND).  RL_APPEND is a special type that
- * is converted to RL_WRITER that specified to lock from the start of the
- * end of file.  Returns the range lock structure.
- */
-rl_t *zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type);
+typedef void (rangelock_cb_t)(struct locked_range *, void *);
 
-/* Unlock range and destroy range lock structure. */
-void zfs_range_unlock(rl_t *rl);
+#ifdef __FreeBSD__
+typedef struct zfs_rangelock {
+#else
+typedef struct rangelock {
+#endif
+       avl_tree_t rl_tree; /* contains locked_range_t */
+       kmutex_t rl_lock;
+       rangelock_cb_t *rl_cb;
+       void *rl_arg;
+} rangelock_t;
 
-/*
- * Reduce range locked as RW_WRITER from whole file to specified range.
- * Asserts the whole file was previously locked.
- */
-void zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len);
+typedef struct locked_range {
+       rangelock_t *lr_rangelock; /* rangelock that this lock applies to */
+       avl_node_t lr_node;     /* avl node link */
+       uint64_t lr_offset;     /* file range offset */
+       uint64_t lr_length;     /* file range length */
+       uint_t lr_count;        /* range reference count in tree */
+       rangelock_type_t lr_type; /* range type */
+       kcondvar_t lr_write_cv; /* cv for waiting writers */
+       kcondvar_t lr_read_cv;  /* cv for waiting readers */
+       uint8_t lr_proxy;       /* acting for original range */
+       uint8_t lr_write_wanted; /* writer wants to lock this range */
+       uint8_t lr_read_wanted; /* reader wants to lock this range */
+} locked_range_t;
 
-/*
- * AVL comparison function used to order range locks
- * Locks are ordered on the start offset of the range.
- */
-int zfs_range_compare(const void *arg1, const void *arg2);
+void rangelock_init(rangelock_t *, rangelock_cb_t *, void *);
+void rangelock_fini(rangelock_t *);
 
-#endif /* _KERNEL */
+locked_range_t *rangelock_enter(rangelock_t *,
+    uint64_t, uint64_t, rangelock_type_t);
+void rangelock_exit(locked_range_t *);
+void rangelock_reduce(locked_range_t *, uint64_t, uint64_t);
 
 #ifdef __cplusplus
 }

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h Wed Oct 
16 08:56:07 2019        (r353633)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h Wed Oct 
16 09:04:53 2019        (r353634)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
  */
@@ -36,6 +36,7 @@
 #include <sys/rrwlock.h>
 #include <sys/zfs_sa.h>
 #include <sys/zfs_stat.h>
+#include <sys/zfs_rlock.h>
 #endif
 #include <sys/zfs_acl.h>
 #include <sys/zil.h>
@@ -57,8 +58,8 @@ extern "C" {
 #define        ZFS_APPENDONLY          0x0000004000000000
 #define        ZFS_NODUMP              0x0000008000000000
 #define        ZFS_OPAQUE              0x0000010000000000
-#define        ZFS_AV_QUARANTINED      0x0000020000000000
-#define        ZFS_AV_MODIFIED         0x0000040000000000
+#define        ZFS_AV_QUARANTINED      0x0000020000000000
+#define        ZFS_AV_MODIFIED         0x0000040000000000
 #define        ZFS_REPARSE             0x0000080000000000
 #define        ZFS_OFFLINE             0x0000100000000000
 #define        ZFS_SPARSE              0x0000200000000000
@@ -78,8 +79,8 @@ extern "C" {
  */
 #define        ZFS_XATTR               0x1             /* is an extended 
attribute */
 #define        ZFS_INHERIT_ACE         0x2             /* ace has inheritable 
ACEs */
-#define        ZFS_ACL_TRIVIAL         0x4             /* files ACL is trivial 
*/
-#define        ZFS_ACL_OBJ_ACE         0x8             /* ACL has CMPLX Object 
ACE */
+#define        ZFS_ACL_TRIVIAL         0x4             /* files ACL is trivial 
*/
+#define        ZFS_ACL_OBJ_ACE         0x8             /* ACL has CMPLX Object 
ACE */
 #define        ZFS_ACL_PROTECTED       0x10            /* ACL protected */
 #define        ZFS_ACL_DEFAULTED       0x20            /* ACL should be 
defaulted */
 #define        ZFS_ACL_AUTO_INHERIT    0x40            /* ACL should be 
inherited */
@@ -177,8 +178,7 @@ typedef struct znode {
        krwlock_t       z_name_lock;    /* "master" lock for dirent locks */
        zfs_dirlock_t   *z_dirlocks;    /* directory entry lock list */
 #endif
-       kmutex_t        z_range_lock;   /* protects changes to z_range_avl */
-       avl_tree_t      z_range_avl;    /* avl tree of file range locks */
+       rangelock_t     z_rangelock;    /* file range locks */
        uint8_t         z_unlinked;     /* file has been unlinked */
        uint8_t         z_atime_dirty;  /* atime needs to be synced */
        uint8_t         z_zn_prefetch;  /* Prefetch znodes? */

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c     Wed Oct 
16 08:56:07 2019        (r353633)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c     Wed Oct 
16 09:04:53 2019        (r353634)
@@ -23,7 +23,7 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  */
 
 /*
@@ -34,9 +34,9 @@
  * Interface
  * ---------
  * Defined in zfs_rlock.h but essentially:
- *     rl = zfs_range_lock(zp, off, len, lock_type);
- *     zfs_range_unlock(rl);
- *     zfs_range_reduce(rl, off, len);
+ *     lr = rangelock_enter(zp, off, len, lock_type);
+ *     rangelock_reduce(lr, off, len); // optional
+ *     rangelock_exit(lr);
  *
  * AVL tree
  * --------
@@ -46,9 +46,10 @@
  *
  * Common case
  * -----------
- * The (hopefully) usual case is of no overlaps or contention for
- * locks. On entry to zfs_lock_range() a rl_t is allocated; the tree
- * searched that finds no overlap, and *this* rl_t is placed in the tree.
+ * The (hopefully) usual case is of no overlaps or contention for locks. On
+ * entry to rangelock_enter(), a locked_range_t is allocated; the tree
+ * searched that finds no overlap, and *this* locked_range_t is placed in the
+ * tree.
  *
  * Overlaps/Reference counting/Proxy locks
  * ---------------------------------------
@@ -87,67 +88,90 @@
  *
  * Grow block handling
  * -------------------
- * ZFS supports multiple block sizes currently upto 128K. The smallest
+ * ZFS supports multiple block sizes, up to 16MB. The smallest
  * block size is used for the file which is grown as needed. During this
  * growth all other writers and readers must be excluded.
  * So if the block size needs to be grown then the whole file is
  * exclusively locked, then later the caller will reduce the lock
- * range to just the range to be written using zfs_reduce_range.
+ * range to just the range to be written using rangelock_reduce().
  */
 
+#include <sys/zfs_context.h>
+#include <sys/avl.h>
 #include <sys/zfs_rlock.h>
 
 /*
+ * AVL comparison function used to order range locks
+ * Locks are ordered on the start offset of the range.
+ */
+static int
+rangelock_compare(const void *arg1, const void *arg2)
+{
+       const locked_range_t *rl1 = arg1;
+       const locked_range_t *rl2 = arg2;
+
+       if (rl1->lr_offset > rl2->lr_offset)
+               return (1);
+       if (rl1->lr_offset < rl2->lr_offset)
+               return (-1);
+       return (0);
+}
+
+/*
+ * The callback is invoked when acquiring a RL_WRITER or RL_APPEND lock.
+ * It must convert RL_APPEND to RL_WRITER (starting at the end of the file),
+ * and may increase the range that's locked for RL_WRITER.
+ */
+void
+rangelock_init(rangelock_t *rl, rangelock_cb_t *cb, void *arg)
+{
+       mutex_init(&rl->rl_lock, NULL, MUTEX_DEFAULT, NULL);
+       avl_create(&rl->rl_tree, rangelock_compare,
+           sizeof (locked_range_t), offsetof(locked_range_t, lr_node));
+       rl->rl_cb = cb;
+       rl->rl_arg = arg;
+}
+
+void
+rangelock_fini(rangelock_t *rl)
+{
+       mutex_destroy(&rl->rl_lock);
+       avl_destroy(&rl->rl_tree);
+}
+
+/*
  * Check if a write lock can be grabbed, or wait and recheck until available.
  */
 static void
-zfs_range_lock_writer(znode_t *zp, rl_t *new)
+rangelock_enter_writer(rangelock_t *rl, locked_range_t *new)
 {
-       avl_tree_t *tree = &zp->z_range_avl;
-       rl_t *rl;
+       avl_tree_t *tree = &rl->rl_tree;
+       locked_range_t *lr;
        avl_index_t where;
-       uint64_t end_size;
-       uint64_t off = new->r_off;
-       uint64_t len = new->r_len;
+       uint64_t orig_off = new->lr_offset;
+       uint64_t orig_len = new->lr_length;
+       rangelock_type_t orig_type = new->lr_type;
 
        for (;;) {
                /*
-                * Range locking is also used by zvol and uses a
-                * dummied up znode. However, for zvol, we don't need to
-                * append or grow blocksize, and besides we don't have
-                * a "sa" data or z_zfsvfs - so skip that processing.
-                *
-                * Yes, this is ugly, and would be solved by not handling
-                * grow or append in range lock code. If that was done then
-                * we could make the range locking code generically available
-                * to other non-zfs consumers.
+                * Call callback which can modify new->r_off,len,type.
+                * Note, the callback is used by the ZPL to handle appending
+                * and changing blocksizes.  It isn't needed for zvols.
                 */
-               if (zp->z_vnode) { /* caller is ZPL */
-                       /*
-                        * If in append mode pick up the current end of file.
-                        * This is done under z_range_lock to avoid races.
-                        */
-                       if (new->r_type == RL_APPEND)
-                               new->r_off = zp->z_size;
-
-                       /*
-                        * If we need to grow the block size then grab the whole
-                        * file range. This is also done under z_range_lock to
-                        * avoid races.
-                        */
-                       end_size = MAX(zp->z_size, new->r_off + len);
-                       if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
-                           zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) {
-                               new->r_off = 0;
-                               new->r_len = UINT64_MAX;
-                       }
+               if (rl->rl_cb != NULL) {
+                       rl->rl_cb(new, rl->rl_arg);
                }
 
                /*
+                * If the type was APPEND, the callback must convert it to
+                * WRITER.
+                */
+               ASSERT3U(new->lr_type, ==, RL_WRITER);
+
+               /*
                 * First check for the usual case of no locks
                 */
                if (avl_numnodes(tree) == 0) {
-                       new->r_type = RL_WRITER; /* convert to writer */
                        avl_add(tree, new);
                        return;
                }
@@ -155,31 +179,33 @@ zfs_range_lock_writer(znode_t *zp, rl_t *new)
                /*
                 * Look for any locks in the range.
                 */
-               rl = avl_find(tree, new, &where);
-               if (rl)
+               lr = avl_find(tree, new, &where);
+               if (lr != NULL)
                        goto wait; /* already locked at same offset */
 
-               rl = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
-               if (rl && (rl->r_off < new->r_off + new->r_len))
+               lr = (locked_range_t *)avl_nearest(tree, where, AVL_AFTER);
+               if (lr != NULL &&
+                   lr->lr_offset < new->lr_offset + new->lr_length)
                        goto wait;
 
-               rl = (rl_t *)avl_nearest(tree, where, AVL_BEFORE);
-               if (rl && rl->r_off + rl->r_len > new->r_off)
+               lr = (locked_range_t *)avl_nearest(tree, where, AVL_BEFORE);
+               if (lr != NULL &&
+                   lr->lr_offset + lr->lr_length > new->lr_offset)
                        goto wait;
 
-               new->r_type = RL_WRITER; /* convert possible RL_APPEND */
                avl_insert(tree, new, where);
                return;
 wait:
-               if (!rl->r_write_wanted) {
-                       cv_init(&rl->r_wr_cv, NULL, CV_DEFAULT, NULL);
-                       rl->r_write_wanted = B_TRUE;
+               if (!lr->lr_write_wanted) {
+                       cv_init(&lr->lr_write_cv, NULL, CV_DEFAULT, NULL);
+                       lr->lr_write_wanted = B_TRUE;
                }
-               cv_wait(&rl->r_wr_cv, &zp->z_range_lock);
+               cv_wait(&lr->lr_write_cv, &rl->rl_lock);
 
                /* reset to original */
-               new->r_off = off;
-               new->r_len = len;
+               new->lr_offset = orig_off;
+               new->lr_length = orig_len;
+               new->lr_type = orig_type;
        }
 }
 
@@ -187,29 +213,29 @@ wait:
  * If this is an original (non-proxy) lock then replace it by
  * a proxy and return the proxy.
  */
-static rl_t *
-zfs_range_proxify(avl_tree_t *tree, rl_t *rl)
+static locked_range_t *
+rangelock_proxify(avl_tree_t *tree, locked_range_t *lr)
 {
-       rl_t *proxy;
+       locked_range_t *proxy;
 
-       if (rl->r_proxy)
-               return (rl); /* already a proxy */
+       if (lr->lr_proxy)
+               return (lr); /* already a proxy */
 
-       ASSERT3U(rl->r_cnt, ==, 1);
-       ASSERT(rl->r_write_wanted == B_FALSE);
-       ASSERT(rl->r_read_wanted == B_FALSE);
-       avl_remove(tree, rl);
-       rl->r_cnt = 0;
+       ASSERT3U(lr->lr_count, ==, 1);
+       ASSERT(lr->lr_write_wanted == B_FALSE);
+       ASSERT(lr->lr_read_wanted == B_FALSE);
+       avl_remove(tree, lr);
+       lr->lr_count = 0;
 
        /* create a proxy range lock */
-       proxy = kmem_alloc(sizeof (rl_t), KM_SLEEP);
-       proxy->r_off = rl->r_off;
-       proxy->r_len = rl->r_len;
-       proxy->r_cnt = 1;
-       proxy->r_type = RL_READER;
-       proxy->r_proxy = B_TRUE;
-       proxy->r_write_wanted = B_FALSE;
-       proxy->r_read_wanted = B_FALSE;
+       proxy = kmem_alloc(sizeof (locked_range_t), KM_SLEEP);
+       proxy->lr_offset = lr->lr_offset;
+       proxy->lr_length = lr->lr_length;
+       proxy->lr_count = 1;
+       proxy->lr_type = RL_READER;
+       proxy->lr_proxy = B_TRUE;
+       proxy->lr_write_wanted = B_FALSE;
+       proxy->lr_read_wanted = B_FALSE;
        avl_add(tree, proxy);
 
        return (proxy);
@@ -219,29 +245,27 @@ zfs_range_proxify(avl_tree_t *tree, rl_t *rl)
  * Split the range lock at the supplied offset
  * returning the *front* proxy.
  */
-static rl_t *
-zfs_range_split(avl_tree_t *tree, rl_t *rl, uint64_t off)
+static locked_range_t *
+rangelock_split(avl_tree_t *tree, locked_range_t *lr, uint64_t off)
 {
-       rl_t *front, *rear;
+       ASSERT3U(lr->lr_length, >, 1);
+       ASSERT3U(off, >, lr->lr_offset);
+       ASSERT3U(off, <, lr->lr_offset + lr->lr_length);
+       ASSERT(lr->lr_write_wanted == B_FALSE);
+       ASSERT(lr->lr_read_wanted == B_FALSE);
 
-       ASSERT3U(rl->r_len, >, 1);
-       ASSERT3U(off, >, rl->r_off);
-       ASSERT3U(off, <, rl->r_off + rl->r_len);
-       ASSERT(rl->r_write_wanted == B_FALSE);
-       ASSERT(rl->r_read_wanted == B_FALSE);
-
        /* create the rear proxy range lock */
-       rear = kmem_alloc(sizeof (rl_t), KM_SLEEP);
-       rear->r_off = off;
-       rear->r_len = rl->r_off + rl->r_len - off;
-       rear->r_cnt = rl->r_cnt;
-       rear->r_type = RL_READER;
-       rear->r_proxy = B_TRUE;
-       rear->r_write_wanted = B_FALSE;
-       rear->r_read_wanted = B_FALSE;
+       locked_range_t *rear = kmem_alloc(sizeof (locked_range_t), KM_SLEEP);
+       rear->lr_offset = off;
+       rear->lr_length = lr->lr_offset + lr->lr_length - off;
+       rear->lr_count = lr->lr_count;
+       rear->lr_type = RL_READER;
+       rear->lr_proxy = B_TRUE;
+       rear->lr_write_wanted = B_FALSE;
+       rear->lr_read_wanted = B_FALSE;
 
-       front = zfs_range_proxify(tree, rl);
-       front->r_len = off - rl->r_off;
+       locked_range_t *front = rangelock_proxify(tree, lr);
+       front->lr_length = off - lr->lr_offset;
 
        avl_insert_here(tree, rear, front, AVL_AFTER);
        return (front);
@@ -251,28 +275,27 @@ zfs_range_split(avl_tree_t *tree, rl_t *rl, uint64_t o
  * Create and add a new proxy range lock for the supplied range.
  */
 static void
-zfs_range_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len)
+rangelock_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len)
 {
-       rl_t *rl;
-
-       ASSERT(len);
-       rl = kmem_alloc(sizeof (rl_t), KM_SLEEP);
-       rl->r_off = off;
-       rl->r_len = len;
-       rl->r_cnt = 1;
-       rl->r_type = RL_READER;
-       rl->r_proxy = B_TRUE;
-       rl->r_write_wanted = B_FALSE;
-       rl->r_read_wanted = B_FALSE;
-       avl_add(tree, rl);
+       ASSERT(len != 0);
+       locked_range_t *lr = kmem_alloc(sizeof (locked_range_t), KM_SLEEP);
+       lr->lr_offset = off;
+       lr->lr_length = len;
+       lr->lr_count = 1;
+       lr->lr_type = RL_READER;
+       lr->lr_proxy = B_TRUE;
+       lr->lr_write_wanted = B_FALSE;
+       lr->lr_read_wanted = B_FALSE;
+       avl_add(tree, lr);
 }
 
 static void
-zfs_range_add_reader(avl_tree_t *tree, rl_t *new, rl_t *prev, avl_index_t 
where)
+rangelock_add_reader(avl_tree_t *tree, locked_range_t *new,
+    locked_range_t *prev, avl_index_t where)
 {
-       rl_t *next;
-       uint64_t off = new->r_off;
-       uint64_t len = new->r_len;
+       locked_range_t *next;
+       uint64_t off = new->lr_offset;
+       uint64_t len = new->lr_length;
 
        /*
         * prev arrives either:
@@ -281,37 +304,37 @@ zfs_range_add_reader(avl_tree_t *tree, rl_t *new, rl_t
         *   range may overlap with the new range
         * - null, if there were no ranges starting before the new one
         */
-       if (prev) {
-               if (prev->r_off + prev->r_len <= off) {
+       if (prev != NULL) {
+               if (prev->lr_offset + prev->lr_length <= off) {
                        prev = NULL;
-               } else if (prev->r_off != off) {
+               } else if (prev->lr_offset != off) {
                        /*
                         * convert to proxy if needed then
                         * split this entry and bump ref count
                         */
-                       prev = zfs_range_split(tree, prev, off);
+                       prev = rangelock_split(tree, prev, off);
                        prev = AVL_NEXT(tree, prev); /* move to rear range */
                }
        }
-       ASSERT((prev == NULL) || (prev->r_off == off));
+       ASSERT((prev == NULL) || (prev->lr_offset == off));
 
-       if (prev)
+       if (prev != NULL)
                next = prev;
        else
-               next = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
+               next = avl_nearest(tree, where, AVL_AFTER);
 
-       if (next == NULL || off + len <= next->r_off) {
+       if (next == NULL || off + len <= next->lr_offset) {
                /* no overlaps, use the original new rl_t in the tree */
                avl_insert(tree, new, where);
                return;
        }
 
-       if (off < next->r_off) {
+       if (off < next->lr_offset) {
                /* Add a proxy for initial range before the overlap */
-               zfs_range_new_proxy(tree, off, next->r_off - off);
+               rangelock_new_proxy(tree, off, next->lr_offset - off);
        }
 
-       new->r_cnt = 0; /* will use proxies in tree */
+       new->lr_count = 0; /* will use proxies in tree */
        /*
         * We now search forward through the ranges, until we go past the end
         * of the new range. For each entry we make it a proxy if it
@@ -319,47 +342,51 @@ zfs_range_add_reader(avl_tree_t *tree, rl_t *new, rl_t
         * gaps between the ranges then we create a new proxy range.
         */
        for (prev = NULL; next; prev = next, next = AVL_NEXT(tree, next)) {
-               if (off + len <= next->r_off)
+               if (off + len <= next->lr_offset)
                        break;
-               if (prev && prev->r_off + prev->r_len < next->r_off) {
+               if (prev != NULL && prev->lr_offset + prev->lr_length <
+                   next->lr_offset) {
                        /* there's a gap */
-                       ASSERT3U(next->r_off, >, prev->r_off + prev->r_len);
-                       zfs_range_new_proxy(tree, prev->r_off + prev->r_len,
-                           next->r_off - (prev->r_off + prev->r_len));
+                       ASSERT3U(next->lr_offset, >,
+                           prev->lr_offset + prev->lr_length);
+                       rangelock_new_proxy(tree,
+                           prev->lr_offset + prev->lr_length,
+                           next->lr_offset -
+                           (prev->lr_offset + prev->lr_length));
                }
-               if (off + len == next->r_off + next->r_len) {
+               if (off + len == next->lr_offset + next->lr_length) {
                        /* exact overlap with end */
-                       next = zfs_range_proxify(tree, next);
-                       next->r_cnt++;
+                       next = rangelock_proxify(tree, next);
+                       next->lr_count++;
                        return;
                }
-               if (off + len < next->r_off + next->r_len) {
+               if (off + len < next->lr_offset + next->lr_length) {
                        /* new range ends in the middle of this block */
-                       next = zfs_range_split(tree, next, off + len);
-                       next->r_cnt++;
+                       next = rangelock_split(tree, next, off + len);
+                       next->lr_count++;
                        return;
                }
-               ASSERT3U(off + len, >, next->r_off + next->r_len);
-               next = zfs_range_proxify(tree, next);
-               next->r_cnt++;
+               ASSERT3U(off + len, >, next->lr_offset + next->lr_length);
+               next = rangelock_proxify(tree, next);
+               next->lr_count++;
        }
 
        /* Add the remaining end range. */
-       zfs_range_new_proxy(tree, prev->r_off + prev->r_len,
-           (off + len) - (prev->r_off + prev->r_len));
+       rangelock_new_proxy(tree, prev->lr_offset + prev->lr_length,
+           (off + len) - (prev->lr_offset + prev->lr_length));
 }
 
 /*
  * Check if a reader lock can be grabbed, or wait and recheck until available.
  */
 static void
-zfs_range_lock_reader(znode_t *zp, rl_t *new)
+rangelock_enter_reader(rangelock_t *rl, locked_range_t *new)
 {
-       avl_tree_t *tree = &zp->z_range_avl;
-       rl_t *prev, *next;
+       avl_tree_t *tree = &rl->rl_tree;
+       locked_range_t *prev, *next;
        avl_index_t where;
-       uint64_t off = new->r_off;
-       uint64_t len = new->r_len;
+       uint64_t off = new->lr_offset;
+       uint64_t len = new->lr_length;
 
        /*
         * Look for any writer locks in the range.
@@ -367,21 +394,22 @@ zfs_range_lock_reader(znode_t *zp, rl_t *new)
 retry:
        prev = avl_find(tree, new, &where);
        if (prev == NULL)
-               prev = (rl_t *)avl_nearest(tree, where, AVL_BEFORE);
+               prev = (locked_range_t *)avl_nearest(tree, where, AVL_BEFORE);
 
        /*
         * Check the previous range for a writer lock overlap.
         */
-       if (prev && (off < prev->r_off + prev->r_len)) {
-               if ((prev->r_type == RL_WRITER) || (prev->r_write_wanted)) {
-                       if (!prev->r_read_wanted) {
-                               cv_init(&prev->r_rd_cv, NULL, CV_DEFAULT, NULL);
-                               prev->r_read_wanted = B_TRUE;
+       if (prev && (off < prev->lr_offset + prev->lr_length)) {
+               if ((prev->lr_type == RL_WRITER) || (prev->lr_write_wanted)) {
+                       if (!prev->lr_read_wanted) {
+                               cv_init(&prev->lr_read_cv,
+                                   NULL, CV_DEFAULT, NULL);
+                               prev->lr_read_wanted = B_TRUE;
                        }
-                       cv_wait(&prev->r_rd_cv, &zp->z_range_lock);
+                       cv_wait(&prev->lr_read_cv, &rl->rl_lock);
                        goto retry;
                }
-               if (off + len < prev->r_off + prev->r_len)
+               if (off + len < prev->lr_offset + prev->lr_length)
                        goto got_lock;
        }
 
@@ -389,70 +417,71 @@ retry:
         * Search through the following ranges to see if there's
         * write lock any overlap.
         */
-       if (prev)
+       if (prev != NULL)
                next = AVL_NEXT(tree, prev);
        else
-               next = (rl_t *)avl_nearest(tree, where, AVL_AFTER);
-       for (; next; next = AVL_NEXT(tree, next)) {
-               if (off + len <= next->r_off)
+               next = (locked_range_t *)avl_nearest(tree, where, AVL_AFTER);
+       for (; next != NULL; next = AVL_NEXT(tree, next)) {
+               if (off + len <= next->lr_offset)
                        goto got_lock;
-               if ((next->r_type == RL_WRITER) || (next->r_write_wanted)) {
-                       if (!next->r_read_wanted) {
-                               cv_init(&next->r_rd_cv, NULL, CV_DEFAULT, NULL);
-                               next->r_read_wanted = B_TRUE;
+               if ((next->lr_type == RL_WRITER) || (next->lr_write_wanted)) {
+                       if (!next->lr_read_wanted) {
+                               cv_init(&next->lr_read_cv,
+                                   NULL, CV_DEFAULT, NULL);
+                               next->lr_read_wanted = B_TRUE;
                        }
-                       cv_wait(&next->r_rd_cv, &zp->z_range_lock);
+                       cv_wait(&next->lr_read_cv, &rl->rl_lock);
                        goto retry;
                }
-               if (off + len <= next->r_off + next->r_len)
+               if (off + len <= next->lr_offset + next->lr_length)
                        goto got_lock;
        }
 
 got_lock:
        /*
         * Add the read lock, which may involve splitting existing
-        * locks and bumping ref counts (r_cnt).
+        * locks and bumping ref counts (r_count).
         */
-       zfs_range_add_reader(tree, new, prev, where);
+       rangelock_add_reader(tree, new, prev, where);
 }
 
 /*
- * Lock a range (offset, length) as either shared (RL_READER)
- * or exclusive (RL_WRITER). Returns the range lock structure
- * for later unlocking or reduce range (if entire file
- * previously locked as RL_WRITER).
+ * Lock a range (offset, length) as either shared (RL_READER) or exclusive
+ * (RL_WRITER or RL_APPEND).  If RL_APPEND is specified, rl_cb() will convert
+ * it to a RL_WRITER lock (with the offset at the end of the file).  Returns
+ * the range lock structure for later unlocking (or reduce range if the
+ * entire file is locked as RL_WRITER).
  */
-rl_t *
-zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type)
+locked_range_t *
+rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len,
+    rangelock_type_t type)
 {
-       rl_t *new;
-
        ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND);
 
-       new = kmem_alloc(sizeof (rl_t), KM_SLEEP);
-       new->r_zp = zp;
-       new->r_off = off;
+       locked_range_t *new = kmem_alloc(sizeof (locked_range_t), KM_SLEEP);
+       new->lr_rangelock = rl;
+       new->lr_offset = off;
        if (len + off < off)    /* overflow */
                len = UINT64_MAX - off;
-       new->r_len = len;
-       new->r_cnt = 1; /* assume it's going to be in the tree */
-       new->r_type = type;
-       new->r_proxy = B_FALSE;
-       new->r_write_wanted = B_FALSE;
-       new->r_read_wanted = B_FALSE;
+       new->lr_length = len;
+       new->lr_count = 1; /* assume it's going to be in the tree */
+       new->lr_type = type;
+       new->lr_proxy = B_FALSE;
+       new->lr_write_wanted = B_FALSE;
+       new->lr_read_wanted = B_FALSE;
 
-       mutex_enter(&zp->z_range_lock);
+       mutex_enter(&rl->rl_lock);
        if (type == RL_READER) {
                /*
                 * First check for the usual case of no locks
                 */
-               if (avl_numnodes(&zp->z_range_avl) == 0)
-                       avl_add(&zp->z_range_avl, new);
+               if (avl_numnodes(&rl->rl_tree) == 0)
+                       avl_add(&rl->rl_tree, new);
                else
-                       zfs_range_lock_reader(zp, new);
+                       rangelock_enter_reader(rl, new);
        } else
-               zfs_range_lock_writer(zp, new); /* RL_WRITER or RL_APPEND */
-       mutex_exit(&zp->z_range_lock);
+               rangelock_enter_writer(rl, new); /* RL_WRITER or RL_APPEND */
+       mutex_exit(&rl->rl_lock);
        return (new);
 }
 
@@ -460,10 +489,9 @@ zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len
  * Unlock a reader lock
  */
 static void
-zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
+rangelock_exit_reader(rangelock_t *rl, locked_range_t *remove)
 {
-       avl_tree_t *tree = &zp->z_range_avl;
-       rl_t *rl, *next = NULL;
+       avl_tree_t *tree = &rl->rl_tree;
        uint64_t len;
 
        /*
@@ -473,129 +501,118 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
         * removed from the tree and replaced by proxies (one or
         * more ranges mapping to the entire range).
         */
-       if (remove->r_cnt == 1) {
+       if (remove->lr_count == 1) {
                avl_remove(tree, remove);
-               if (remove->r_write_wanted) {
-                       cv_broadcast(&remove->r_wr_cv);
-                       cv_destroy(&remove->r_wr_cv);
+               if (remove->lr_write_wanted) {
+                       cv_broadcast(&remove->lr_write_cv);
+                       cv_destroy(&remove->lr_write_cv);
                }
-               if (remove->r_read_wanted) {
-                       cv_broadcast(&remove->r_rd_cv);
-                       cv_destroy(&remove->r_rd_cv);
+               if (remove->lr_read_wanted) {
+                       cv_broadcast(&remove->lr_read_cv);
+                       cv_destroy(&remove->lr_read_cv);
                }
        } else {
-               ASSERT0(remove->r_cnt);
-               ASSERT0(remove->r_write_wanted);
-               ASSERT0(remove->r_read_wanted);
+               ASSERT0(remove->lr_count);
+               ASSERT0(remove->lr_write_wanted);
+               ASSERT0(remove->lr_read_wanted);
                /*
                 * Find start proxy representing this reader lock,
                 * then decrement ref count on all proxies
                 * that make up this range, freeing them as needed.
                 */
-               rl = avl_find(tree, remove, NULL);
-               ASSERT(rl);
-               ASSERT(rl->r_cnt);
-               ASSERT(rl->r_type == RL_READER);
-               for (len = remove->r_len; len != 0; rl = next) {
-                       len -= rl->r_len;
-                       if (len) {
-                               next = AVL_NEXT(tree, rl);
-                               ASSERT(next);
-                               ASSERT(rl->r_off + rl->r_len == next->r_off);
-                               ASSERT(next->r_cnt);
-                               ASSERT(next->r_type == RL_READER);
+               locked_range_t *lr = avl_find(tree, remove, NULL);
+               ASSERT3P(lr, !=, NULL);
+               ASSERT3U(lr->lr_count, !=, 0);
+               ASSERT3U(lr->lr_type, ==, RL_READER);
+               locked_range_t *next = NULL;
+               for (len = remove->lr_length; len != 0; lr = next) {
+                       len -= lr->lr_length;
+                       if (len != 0) {
+                               next = AVL_NEXT(tree, lr);
+                               ASSERT3P(next, !=, NULL);
+                               ASSERT3U(lr->lr_offset + lr->lr_length, ==,
+                                   next->lr_offset);
+                               ASSERT3U(next->lr_count, !=, 0);
+                               ASSERT3U(next->lr_type, ==, RL_READER);
                        }
-                       rl->r_cnt--;
-                       if (rl->r_cnt == 0) {
-                               avl_remove(tree, rl);
-                               if (rl->r_write_wanted) {
-                                       cv_broadcast(&rl->r_wr_cv);
-                                       cv_destroy(&rl->r_wr_cv);
+                       lr->lr_count--;
+                       if (lr->lr_count == 0) {
+                               avl_remove(tree, lr);
+                               if (lr->lr_write_wanted) {
+                                       cv_broadcast(&lr->lr_write_cv);
+                                       cv_destroy(&lr->lr_write_cv);
                                }
-                               if (rl->r_read_wanted) {
-                                       cv_broadcast(&rl->r_rd_cv);
-                                       cv_destroy(&rl->r_rd_cv);
+                               if (lr->lr_read_wanted) {
+                                       cv_broadcast(&lr->lr_read_cv);
+                                       cv_destroy(&lr->lr_read_cv);
                                }
-                               kmem_free(rl, sizeof (rl_t));
+                               kmem_free(lr, sizeof (locked_range_t));
                        }
                }
        }
-       kmem_free(remove, sizeof (rl_t));
+       kmem_free(remove, sizeof (locked_range_t));
 }
 
 /*
  * Unlock range and destroy range lock structure.
  */
 void
-zfs_range_unlock(rl_t *rl)
+rangelock_exit(locked_range_t *lr)
 {
-       znode_t *zp = rl->r_zp;
+       rangelock_t *rl = lr->lr_rangelock;
 
-       ASSERT(rl->r_type == RL_WRITER || rl->r_type == RL_READER);
-       ASSERT(rl->r_cnt == 1 || rl->r_cnt == 0);
-       ASSERT(!rl->r_proxy);
+       ASSERT(lr->lr_type == RL_WRITER || lr->lr_type == RL_READER);
+       ASSERT(lr->lr_count == 1 || lr->lr_count == 0);
+       ASSERT(!lr->lr_proxy);
 
-       mutex_enter(&zp->z_range_lock);
-       if (rl->r_type == RL_WRITER) {
+       mutex_enter(&rl->rl_lock);
+       if (lr->lr_type == RL_WRITER) {
                /* writer locks can't be shared or split */
-               avl_remove(&zp->z_range_avl, rl);
-               mutex_exit(&zp->z_range_lock);
-               if (rl->r_write_wanted) {
-                       cv_broadcast(&rl->r_wr_cv);
-                       cv_destroy(&rl->r_wr_cv);
+               avl_remove(&rl->rl_tree, lr);
+               mutex_exit(&rl->rl_lock);
+               if (lr->lr_write_wanted) {
+                       cv_broadcast(&lr->lr_write_cv);
+                       cv_destroy(&lr->lr_write_cv);
                }
-               if (rl->r_read_wanted) {
-                       cv_broadcast(&rl->r_rd_cv);
-                       cv_destroy(&rl->r_rd_cv);
+               if (lr->lr_read_wanted) {
+                       cv_broadcast(&lr->lr_read_cv);
+                       cv_destroy(&lr->lr_read_cv);
                }
-               kmem_free(rl, sizeof (rl_t));
+               kmem_free(lr, sizeof (locked_range_t));
        } else {
                /*
-                * lock may be shared, let zfs_range_unlock_reader()
+                * lock may be shared, let rangelock_exit_reader()
                 * release the lock and free the rl_t
                 */
-               zfs_range_unlock_reader(zp, rl);
-               mutex_exit(&zp->z_range_lock);
+               rangelock_exit_reader(rl, lr);
+               mutex_exit(&rl->rl_lock);
        }
 }
 
 /*
  * Reduce range locked as RL_WRITER from whole file to specified range.
- * Asserts the whole file is exclusivly locked and so there's only one
+ * Asserts the whole file is exclusively locked and so there's only one
  * entry in the tree.
  */
 void

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to