Author: kib
Date: Wed May 30 16:06:38 2012
New Revision: 236317
URL: http://svn.freebsd.org/changeset/base/236317

Log:
  Add a rangelock implementation, intended to be used to range-locking
  the i/o regions of the vnode data space. The implementation is quite
  simple-minded, it uses the list of the lock requests, ordered by
  arrival time. Each request may be for read or for write. The
  implementation is fair FIFO.
  
  MFC after:     2 month

Added:
  head/sys/kern/kern_rangelock.c   (contents, props changed)
  head/sys/sys/rangelock.h   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/kern/kern_thread.c
  head/sys/kern/vfs_subr.c
  head/sys/sys/proc.h
  head/sys/sys/vnode.h

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files Wed May 30 16:04:10 2012        (r236316)
+++ head/sys/conf/files Wed May 30 16:06:38 2012        (r236317)
@@ -2562,6 +2562,7 @@ kern/kern_priv.c          standard
 kern/kern_proc.c               standard
 kern/kern_prot.c               standard
 kern/kern_racct.c              standard
+kern/kern_rangelock.c          standard
 kern/kern_rctl.c               standard
 kern/kern_resource.c           standard
 kern/kern_rmlock.c             standard

Added: head/sys/kern/kern_rangelock.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/kern/kern_rangelock.c      Wed May 30 16:06:38 2012        
(r236317)
@@ -0,0 +1,246 @@
+/*-
+ * Copyright (c) 2009 Konstantin Belousov <k...@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rangelock.h>
+#include <sys/systm.h>
+
+#include <vm/uma.h>
+
+struct rl_q_entry {
+       TAILQ_ENTRY(rl_q_entry) rl_q_link;
+       off_t           rl_q_start, rl_q_end;
+       int             rl_q_flags;
+};
+
+static uma_zone_t rl_entry_zone;
+
+static void
+rangelock_sys_init(void)
+{
+
+       rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
+           NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+}
+SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
+
+static struct rl_q_entry *
+rlqentry_alloc(void)
+{
+
+       return (uma_zalloc(rl_entry_zone, M_WAITOK));
+}
+
+void
+rlqentry_free(struct rl_q_entry *rleq)
+{
+
+       uma_zfree(rl_entry_zone, rleq);
+}
+
+void
+rangelock_init(struct rangelock *lock)
+{
+
+       TAILQ_INIT(&lock->rl_waiters);
+       lock->rl_currdep = NULL;
+}
+
+void
+rangelock_destroy(struct rangelock *lock)
+{
+
+       KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters"));
+}
+
+/*
+ * Verifies the supplied rl_q_entries for compatibility.  Returns true
+ * if the rangelock queue entries are not compatible, false if they are.
+ *
+ * Two entries are compatible if their ranges do not overlap, or both
+ * entries are for read.
+ */
+static int
+rangelock_incompatible(const struct rl_q_entry *e1,
+    const struct rl_q_entry *e2)
+{
+
+       if ((e1->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ &&
+           (e2->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ)
+               return (0);
+       if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start)
+               return (1);
+       return (0);
+}
+
+/*
+ * Recalculate the lock->rl_currdep after an unlock.
+ */
+static void
+rangelock_calc_block(struct rangelock *lock)
+{
+       struct rl_q_entry *entry, *entry1, *whead;
+
+       if (lock->rl_currdep == TAILQ_FIRST(&lock->rl_waiters) &&
+           lock->rl_currdep != NULL)
+               lock->rl_currdep = TAILQ_NEXT(lock->rl_currdep, rl_q_link);
+       for (entry = lock->rl_currdep; entry != NULL;
+            entry = TAILQ_NEXT(entry, rl_q_link)) {
+               TAILQ_FOREACH(entry1, &lock->rl_waiters, rl_q_link) {
+                       if (rangelock_incompatible(entry, entry1))
+                               goto out;
+                       if (entry1 == entry)
+                               break;
+               }
+       }
+out:
+       lock->rl_currdep = entry;
+       TAILQ_FOREACH(whead, &lock->rl_waiters, rl_q_link) {
+               if (whead == lock->rl_currdep)
+                       break;
+               if (!(whead->rl_q_flags & RL_LOCK_GRANTED)) {
+                       whead->rl_q_flags |= RL_LOCK_GRANTED;
+                       wakeup(whead);
+               }
+       }
+}
+
+static void
+rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry,
+    struct mtx *ilk)
+{
+
+       MPASS(lock != NULL && entry != NULL && ilk != NULL);
+       mtx_assert(ilk, MA_OWNED);
+       KASSERT(entry != lock->rl_currdep, ("stuck currdep"));
+
+       TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
+       rangelock_calc_block(lock);
+       mtx_unlock(ilk);
+       if (curthread->td_rlqe == NULL)
+               curthread->td_rlqe = entry;
+       else
+               rlqentry_free(entry);
+}
+
+void
+rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk)
+{
+
+       MPASS(lock != NULL && cookie != NULL && ilk != NULL);
+
+       mtx_lock(ilk);
+       rangelock_unlock_locked(lock, cookie, ilk);
+}
+
+/*
+ * Unlock the sub-range of granted lock.
+ */
+void *
+rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start,
+    off_t end, struct mtx *ilk)
+{
+       struct rl_q_entry *entry;
+
+       MPASS(lock != NULL && cookie != NULL && ilk != NULL);
+       entry = cookie;
+       KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED,
+           ("Unlocking non-granted lock"));
+       KASSERT(entry->rl_q_start == start, ("wrong start"));
+       KASSERT(entry->rl_q_end >= end, ("wrong end"));
+
+       mtx_lock(ilk);
+       if (entry->rl_q_end == end) {
+               rangelock_unlock_locked(lock, cookie, ilk);
+               return (NULL);
+       }
+       entry->rl_q_end = end;
+       rangelock_calc_block(lock);
+       mtx_unlock(ilk);
+       return (cookie);
+}
+
+/*
+ * Add the lock request to the queue of the pending requests for
+ * rangelock.  Sleep until the request can be granted.
+ */
+static void *
+rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode,
+    struct mtx *ilk)
+{
+       struct rl_q_entry *entry;
+       struct thread *td;
+
+       MPASS(lock != NULL && ilk != NULL);
+
+       td = curthread;
+       if (td->td_rlqe != NULL) {
+               entry = td->td_rlqe;
+               td->td_rlqe = NULL;
+       } else
+               entry = rlqentry_alloc();
+       MPASS(entry != NULL);
+       entry->rl_q_flags = mode;
+       entry->rl_q_start = start;
+       entry->rl_q_end = end;
+
+       mtx_lock(ilk);
+       /*
+        * XXXKIB TODO. Check that a thread does not try to enqueue a
+        * lock that is incompatible with another request from the same
+        * thread.
+        */
+
+       TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link);
+       if (lock->rl_currdep == NULL)
+               lock->rl_currdep = entry;
+       rangelock_calc_block(lock);
+       while (!(entry->rl_q_flags & RL_LOCK_GRANTED))
+               msleep(entry, ilk, 0, "range", 0);
+       mtx_unlock(ilk);
+       return (entry);
+}
+
+void *
+rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx 
*ilk)
+{
+
+       return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk));
+}
+
+void *
+rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx 
*ilk)
+{
+
+       return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk));
+}

Modified: head/sys/kern/kern_thread.c
==============================================================================
--- head/sys/kern/kern_thread.c Wed May 30 16:04:10 2012        (r236316)
+++ head/sys/kern/kern_thread.c Wed May 30 16:06:38 2012        (r236317)
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
+#include <sys/rangelock.h>
 #include <sys/resourcevar.h>
 #include <sys/sdt.h>
 #include <sys/smp.h>
@@ -205,6 +206,7 @@ thread_init(void *mem, int size, int fla
 
        td->td_sleepqueue = sleepq_alloc();
        td->td_turnstile = turnstile_alloc();
+       td->td_rlqe = NULL;
        EVENTHANDLER_INVOKE(thread_init, td);
        td->td_sched = (struct td_sched *)&td[1];
        umtx_thread_init(td);
@@ -222,6 +224,7 @@ thread_fini(void *mem, int size)
 
        td = (struct thread *)mem;
        EVENTHANDLER_INVOKE(thread_fini, td);
+       rlqentry_free(td->td_rlqe);
        turnstile_free(td->td_turnstile);
        sleepq_free(td->td_sleepqueue);
        umtx_thread_fini(td);

Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c    Wed May 30 16:04:10 2012        (r236316)
+++ head/sys/kern/vfs_subr.c    Wed May 30 16:06:38 2012        (r236317)
@@ -1027,6 +1027,7 @@ alloc:
                if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0)
                        vp->v_vflag |= VV_NOKNOTE;
        }
+       rangelock_init(&vp->v_rl);
 
        *vpp = vp;
        return (0);
@@ -2468,6 +2469,7 @@ vdropl(struct vnode *vp)
        /* XXX Elsewhere we detect an already freed vnode via NULL v_op. */
        vp->v_op = NULL;
 #endif
+       rangelock_destroy(&vp->v_rl);
        lockdestroy(vp->v_vnlock);
        mtx_destroy(&vp->v_interlock);
        mtx_destroy(BO_MTX(bo));

Modified: head/sys/sys/proc.h
==============================================================================
--- head/sys/sys/proc.h Wed May 30 16:04:10 2012        (r236316)
+++ head/sys/sys/proc.h Wed May 30 16:06:38 2012        (r236317)
@@ -213,6 +213,7 @@ struct thread {
        struct seltd    *td_sel;        /* Select queue/channel. */
        struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */
        struct turnstile *td_turnstile; /* (k) Associated turnstile. */
+       struct rl_q_entry *td_rlqe;     /* (k) Associated range lock entry. */
        struct umtx_q   *td_umtxq;      /* (c?) Link for when we're blocked. */
        lwpid_t         td_tid;         /* (b) Thread ID. */
        sigqueue_t      td_sigqueue;    /* (c) Sigs arrived, not delivered. */

Added: head/sys/sys/rangelock.h
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/sys/rangelock.h    Wed May 30 16:06:38 2012        (r236317)
@@ -0,0 +1,78 @@
+/*-
+ * Copyright (c) 2009 Konstantin Belousov <k...@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef        _SYS_RANGELOCK_H
+#define        _SYS_RANGELOCK_H
+
+#include <sys/queue.h>
+
+#define        RL_LOCK_READ            0x0001
+#define        RL_LOCK_WRITE           0x0002
+#define        RL_LOCK_TYPE_MASK       0x0003
+#define        RL_LOCK_GRANTED         0x0004
+
+struct rl_q_entry;
+
+/*
+ * The structure representing the range lock.  Caller may request
+ * read or write access to the range of bytes. Access is granted if
+ * all existing lock owners are compatible with the request. Two lock
+ * owners are compatible if their ranges do not overlap, or both
+ * owners are for read.
+ *
+ * Access to the structure itself is synchronized with the externally
+ * supplied mutex.
+ *
+ * rl_waiters is the queue of lock requests in the order of arrival.
+ * rl_currdep is the first lock request that cannot be granted now due
+ * to the preceding requests conflicting with it.
+ */
+struct rangelock {
+       TAILQ_HEAD(, rl_q_entry) rl_waiters;
+       struct rl_q_entry       *rl_currdep;
+};
+
+#ifdef _KERNEL
+
+struct mtx;
+
+void    rangelock_init(struct rangelock *lock);
+void    rangelock_destroy(struct rangelock *lock);
+void    rangelock_unlock(struct rangelock *lock, void *cookie,
+           struct mtx *ilk);
+void   *rangelock_unlock_range(struct rangelock *lock, void *cookie,
+           off_t start, off_t end, struct mtx *ilk);
+void   *rangelock_rlock(struct rangelock *lock, off_t start, off_t end,
+           struct mtx *ilk);
+void   *rangelock_wlock(struct rangelock *lock, off_t start, off_t end,
+           struct mtx *ilk);
+void    rlqentry_free(struct rl_q_entry *rlqe);
+
+#endif /* _KERNEL */
+
+#endif /* _SYS_RANGELOCK_H */

Modified: head/sys/sys/vnode.h
==============================================================================
--- head/sys/sys/vnode.h        Wed May 30 16:04:10 2012        (r236316)
+++ head/sys/sys/vnode.h        Wed May 30 16:06:38 2012        (r236317)
@@ -38,6 +38,7 @@
 #include <sys/lock.h>
 #include <sys/lockmgr.h>
 #include <sys/mutex.h>
+#include <sys/rangelock.h>
 #include <sys/selinfo.h>
 #include <sys/uio.h>
 #include <sys/acl.h>
@@ -165,6 +166,7 @@ struct vnode {
        struct vpollinfo *v_pollinfo;           /* i Poll events, p for *v_pi */
        struct label *v_label;                  /* MAC label for vnode */
        struct lockf *v_lockf;          /* Byte-level advisory lock list */
+       struct rangelock v_rl;                  /* Byte-range lock */
 };
 
 #endif /* defined(_KERNEL) || defined(_KVM_VNODE) */
@@ -679,6 +681,15 @@ int        vn_extattr_rm(struct vnode *vp, int 
 int    vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags,
            struct vnode **rvp);
 
+#define        vn_rangelock_unlock(vp, cookie)                                 
\
+       rangelock_unlock(&(vp)->v_rl, (cookie), VI_MTX(vp))
+#define        vn_rangelock_unlock_range(vp, cookie, start, end)               
\
+       rangelock_unlock_range(&(vp)->v_rl, (cookie), (start), (end),   \
+           VI_MTX(vp))
+#define        vn_rangelock_rlock(vp, start, end)                              
\
+       rangelock_rlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
+#define        vn_rangelock_wlock(vp, start, end)                              
\
+       rangelock_wlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
 
 int    vfs_cache_lookup(struct vop_lookup_args *ap);
 void   vfs_timestamp(struct timespec *);
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to