Re: [PATCH v3 06/20] seqlock: Extend seqcount API with associated locks

Peter Zijlstra Tue, 07 Jul 2020 07:38:39 -0700

On Tue, Jul 07, 2020 at 03:04:10PM +0200, Peter Zijlstra wrote:

> Anyway, let me muck around with that a bit.


How's this? it removes a level of indirection and a bunch of repetition.

It doesn't provide SEQCNT_LOCKTYPE_ZERO() for each LOCKTYPE, but you can
use this one macro for any LOCKTYPE.

It's also more than 200 lines shorter.

---
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 8b97204f35a7..2b599cf03db4 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -1,36 +1,15 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __LINUX_SEQLOCK_H
 #define __LINUX_SEQLOCK_H
+
 /*
- * Reader/writer consistent mechanism without starving writers. This type of
- * lock for data where the reader wants a consistent set of information
- * and is willing to retry if the information changes. There are two types
- * of readers:
- * 1. Sequence readers which never block a writer but they may have to retry
- *    if a writer is in progress by detecting change in sequence number.
- *    Writers do not wait for a sequence reader.
- * 2. Locking readers which will wait if a writer or another locking reader
- *    is in progress. A locking reader in progress will also block a writer
- *    from going forward. Unlike the regular rwlock, the read lock here is
- *    exclusive so that only one locking reader can get it.
- *
- * This is not as cache friendly as brlock. Also, this may not work well
- * for data that contains pointers, because any writer could
- * invalidate a pointer that a reader was following.
- *
- * Expected non-blocking reader usage:
- *     do {
- *         seq = read_seqbegin(&foo);
- *     ...
- *      } while (read_seqretry(&foo, seq));
+ * seqcount_t / seqlock_t - a reader-writer consistency mechanism with
+ * lockless readers (read-only retry loops), and no writer starvation.
  *
+ * See Documentation/locking/seqlock.rst for full description.
  *
- * On non-SMP the spin locks disappear but the writer still needs
- * to increment the sequence variables because an interrupt routine could
- * change the state of the data.
- *
- * Based on x86_64 vsyscall gettimeofday 
- * by Keith Owens and Andrea Arcangeli
+ * Copyrights:
+ * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli
  */
 
 #include <linux/spinlock.h>
@@ -41,8 +20,8 @@
 #include <asm/processor.h>
 
 /*
- * The seqlock interface does not prescribe a precise sequence of read
- * begin/retry/end. For readers, typically there is a call to
+ * The seqlock seqcount_t interface does not prescribe a precise sequence of
+ * read begin/retry/end. For readers, typically there is a call to
  * read_seqcount_begin() and read_seqcount_retry(), however, there are more
  * esoteric cases which do not follow this pattern.
  *
@@ -56,10 +35,28 @@
 #define KCSAN_SEQLOCK_REGION_MAX 1000
 
 /*
- * Version using sequence counter only.
- * This can be used when code has its own mutex protecting the
- * updating starting before the write_seqcountbeqin() and ending
- * after the write_seqcount_end().
+ * Sequence counters (seqcount_t)
+ *
+ * This is the raw counting mechanism, without any writer protection.
+ *
+ * Write side critical sections must be serialized and non-preemptible.
+ *
+ * If readers can be invoked from hardirq or softirq contexts,
+ * interrupts or bottom halves must also be respectively disabled before
+ * entering the write section.
+ *
+ * This mechanism can't be used if the protected data contains pointers,
+ * as the writer can invalidate a pointer that a reader is following.
+ *
+ * If the write serialization mechanism is one of the common kernel
+ * locking primitives, use a sequence counter with associated lock
+ * (seqcount_LOCKTYPE_t) instead.
+ *
+ * If it's desired to automatically handle the sequence counter writer
+ * serialization and non-preemptibility requirements, use a sequential
+ * lock (seqlock_t) instead.
+ *
+ * See Documentation/locking/seqlock.rst
  */
 typedef struct seqcount {
        unsigned sequence;
@@ -82,6 +79,10 @@ static inline void __seqcount_init(seqcount_t *s, const char 
*name,
 # define SEQCOUNT_DEP_MAP_INIT(lockname) \
                .dep_map = { .name = #lockname } \
 
+/**
+ * seqcount_init() - runtime initializer for seqcount_t
+ * @s: Pointer to the &typedef seqcount_t instance
+ */
 # define seqcount_init(s)                              \
        do {                                            \
                static struct lock_class_key __key;     \
@@ -105,12 +106,122 @@ static inline void seqcount_lockdep_reader_access(const 
seqcount_t *s)
 # define seqcount_lockdep_reader_access(x)
 #endif
 
-#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)}
+/**
+ * SEQCNT_ZERO() - static initializer for seqcount_t
+ * @name: Name of the &typedef seqcount_t instance
+ */
+#define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) }
+
+/*
+ * Sequence counters with associated locks (seqcount_LOCKTYPE_t)
+ *
+ * A sequence counter which associates the lock used for writer
+ * serialization at initialization time. This enables lockdep to validate
+ * that the write side critical section is properly serialized.
+ *
+ * For associated locks which do not implicitly disable preemption,
+ * preemption protection is enforced in the write side function.
+ *
+ * See Documentation/locking/seqlock.rst
+ */
+
+#ifdef CONFIG_LOCKDEP
+#define __SEQCOUNT_LOCKDEP(expr)       expr
+#else
+#define __SEQCOUNT_LOCKDEP(expr)
+#endif
+
+#define SEQCOUNT_LOCKTYPE(name, locktype)                              \
+typedef struct seqcount_##name {                                       \
+       seqcount_t      seqcount;                                       \
+       __SEQCOUNT_LOCKDEP(locktype *lock);                             \
+} seqcount_##name##_t;                                                 \
+                                                                       \
+static __always_inline void                                            \
+seqcount_##name##_init(seqcount_##name##_t *s, locktype *l)            \
+{                                                                      \
+       seqcount_init(&s->seqcount);                                    \
+       __SEQCOUNT_LOCKDEP(s->lock = l);                                \
+}
+
+#define SEQCNT_LOCKTYPE_ZERO(_name, _lock) {                           \
+       .seqcount = SEQCNT_ZERO(_name.seqcount),                        \
+       __SEQCOUNT_LOCKDEP(.lock = (_lock))                             \
+}
+
+#include <linux/spinlock.h>
+#include <linux/ww_mutex.h>
+
+SEQCOUNT_LOCKTYPE(raw_spinlock, raw_spinlock_t)
+SEQCOUNT_LOCKTYPE(spinlock, spinlock_t)
+SEQCOUNT_LOCKTYPE(rwlock, rwlock_t)
+SEQCOUNT_LOCKTYPE(mutex, struct mutex)
+SEQCOUNT_LOCKTYPE(ww_mutex, struct ww_mutex)
+
+#define __to_seqcount_t(s)     (seqcount_t *)(s)
+
+/*
+ *     seqcount_LOCKTYPE_t -- write APIs
+ *
+ * For associated lock types which do not implicitly disable preemption,
+ * enforce preemption protection in the write side functions.
+ *
+ * Never use lockdep for the raw write variants.
+ */
+
+#define __associated_lock_is_preemptible(s)                            \
+({                                                                     \
+       bool ret;                                                       \
+                                                                       \
+       if (__same_type(*(s), seqcount_t) ||                            \
+           __same_type(*(s), seqcount_raw_spinlock_t)) {               \
+               ret = false;                                            \
+       } else if (__same_type(*(s), seqcount_spinlock_t) ||            \
+                  __same_type(*(s), seqcount_rwlock_t)) {              \
+               ret = IS_BUILTIN(CONFIG_PREEMPT_RT);                    \
+       } else if (__same_type(*(s), seqcount_mutex_t) ||               \
+                  __same_type(*(s), seqcount_ww_mutex_t)) {            \
+               ret = true;                                             \
+       } else                                                          \
+               BUILD_BUG_ON_MSG(1, "Unknown seqcount type");           \
+                                                                       \
+       ret;                                                            \
+})
+
+#ifdef CONFIG_LOCKDEP
+
+#define __assert_associated_lock_held(s)                               \
+do {                                                                   \
+       if (__same_type(*(s), seqcount_t))                              \
+               break;                                                  \
+                                                                       \
+       if (__same_type(*(s), seqcount_spinlock_t))                     \
+               lockdep_assert_held(((seqcount_spinlock_t *)(s))->lock);\
+       else if (__same_type(*(s), seqcount_raw_spinlock_t))            \
+               lockdep_assert_held(((seqcount_raw_spinlock_t *)(s))->lock);    
\
+       else if (__same_type(*(s), seqcount_rwlock_t))                  \
+               lockdep_assert_held_write(((seqcount_rwlock_t *)(s))->lock);    
\
+       else if (__same_type(*(s), seqcount_mutex_t))                   \
+               lockdep_assert_held(((seqcount_mutex_t *)(s))->lock);   \
+       else if (__same_type(*(s), seqcount_ww_mutex_t))                \
+               lockdep_assert_held(&((seqcount_ww_mutex_t *)(s))->lock->base); 
\
+       else                                                            \
+               BUILD_BUG_ON_MSG(1, "Unknown seqcount type");           \
+} while (0)
+
+#else
+
+#define __assert_associated_lock_held(s)                               \
+do {                                                                   \
+       (void) __to_seqcount_t(s);                                      \
+} while (0)
+
+#endif /* CONFIG_LOCKDEP */
 
 
 /**
- * __read_seqcount_begin - begin a seq-read critical section (without barrier)
- * @s: pointer to seqcount_t
+ * __read_seqcount_begin() - begin a seqcount read section (without barrier)
+ * @s: Pointer to &typedef seqcount_t or any of the seqcount_locktype_t 
variants
  * Returns: count to be passed to read_seqcount_retry
  *
  * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
@@ -121,7 +232,9 @@ static inline void seqcount_lockdep_reader_access(const 
seqcount_t *s)
  * Use carefully, only in critical code, and comment how the barrier is
  * provided.
  */
-static inline unsigned __read_seqcount_begin(const seqcount_t *s)
+#define __read_seqcount_begin(s)       
do___read_seqcount_begin(__to_seqcount_t(s))
+
+static inline unsigned do___read_seqcount_begin(const seqcount_t *s)
 {
        unsigned ret;
 
@@ -136,15 +249,18 @@ static inline unsigned __read_seqcount_begin(const 
seqcount_t *s)
 }
 
 /**
- * raw_read_seqcount - Read the raw seqcount
- * @s: pointer to seqcount_t
+ * raw_read_seqcount() - Read the seqcount raw counter value
+ * @s: Pointer to &typedef seqcount_t or any of the seqcount_locktype_t 
variants
  * Returns: count to be passed to read_seqcount_retry
  *
  * raw_read_seqcount opens a read critical section of the given
- * seqcount without any lockdep checking and without checking or
- * masking the LSB. Calling code is responsible for handling that.
+ * seqcount_t, without any lockdep checks and without checking or
+ * masking the sequence counter LSB. Calling code is responsible for
+ * handling that.
  */
-static inline unsigned raw_read_seqcount(const seqcount_t *s)
+#define raw_read_seqcount(s)   do_raw_read_seqcount(__to_seqcount_t(s))
+
+static inline unsigned do_raw_read_seqcount(const seqcount_t *s)
 {
        unsigned ret = READ_ONCE(s->sequence);
        smp_rmb();
@@ -153,42 +269,46 @@ static inline unsigned raw_read_seqcount(const seqcount_t 
*s)
 }
 
 /**
- * raw_read_seqcount_begin - start seq-read critical section w/o lockdep
- * @s: pointer to seqcount_t
+ * raw_read_seqcount_begin() - start a seqcount read section w/o lockdep
+ * @s: Pointer to &typedef seqcount_t or any of the seqcount_locktype_t 
variants
  * Returns: count to be passed to read_seqcount_retry
  *
  * raw_read_seqcount_begin opens a read critical section of the given
- * seqcount, but without any lockdep checking. Validity of the critical
- * section is tested by checking read_seqcount_retry function.
+ * seqcount_t, but without any lockdep checking. Validity of the read
+ * section must be checked with read_seqcount_retry().
  */
-static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
+#define raw_read_seqcount_begin(s)     
do_raw_read_seqcount_begin(__to_seqcount_t(s))
+
+static inline unsigned do_raw_read_seqcount_begin(const seqcount_t *s)
 {
-       unsigned ret = __read_seqcount_begin(s);
+       unsigned ret = do___read_seqcount_begin(s);
        smp_rmb();
        return ret;
 }
 
 /**
- * read_seqcount_begin - begin a seq-read critical section
- * @s: pointer to seqcount_t
+ * read_seqcount_begin() - start a seqcount read critical section
+ * @s: pointer to &typedef seqcount_t or any of the seqcount_locktype_t 
variants
  * Returns: count to be passed to read_seqcount_retry
  *
- * read_seqcount_begin opens a read critical section of the given seqcount.
- * Validity of the critical section is tested by checking read_seqcount_retry
- * function.
+ * read_seqcount_begin opens a read critical section of the given
+ * seqcount_t. Validity of the read section must be checked with
+ * read_seqcount_retry().
  */
-static inline unsigned read_seqcount_begin(const seqcount_t *s)
+#define read_seqcount_begin(s) do_read_seqcount_begin(__to_seqcount_t(s))
+
+static inline unsigned do_read_seqcount_begin(const seqcount_t *s)
 {
        seqcount_lockdep_reader_access(s);
-       return raw_read_seqcount_begin(s);
+       return do_raw_read_seqcount_begin(s);
 }
 
 /**
- * raw_seqcount_begin - begin a seq-read critical section
- * @s: pointer to seqcount_t
+ * raw_seqcount_begin() - begin a seq-read critical section
+ * @s: pointer to &typedef seqcount_t or any of the seqcount_locktype_t 
variants
  * Returns: count to be passed to read_seqcount_retry
  *
- * raw_seqcount_begin opens a read critical section of the given seqcount.
+ * raw_seqcount_begin opens a read critical section of the given seqcount_t.
  * Validity of the critical section is tested by checking read_seqcount_retry
  * function.
  *
@@ -197,7 +317,9 @@ static inline unsigned read_seqcount_begin(const seqcount_t 
*s)
  * read_seqcount_retry() instead of stabilizing at the beginning of the
  * critical section.
  */
-static inline unsigned raw_seqcount_begin(const seqcount_t *s)
+#define raw_seqcount_begin(s)  do_raw_seqcount_begin(__to_seqcount_t(s))
+
+static inline unsigned do_raw_seqcount_begin(const seqcount_t *s)
 {
        unsigned ret = READ_ONCE(s->sequence);
        smp_rmb();
@@ -206,8 +328,8 @@ static inline unsigned raw_seqcount_begin(const seqcount_t 
*s)
 }
 
 /**
- * __read_seqcount_retry - end a seq-read critical section (without barrier)
- * @s: pointer to seqcount_t
+ * __read_seqcount_retry() - end a seq-read critical section (without barrier)
+ * @s: pointer to &typedef seqcount_t or any of the seqcount_locktype_t 
variants
  * @start: count, from read_seqcount_begin
  * Returns: 1 if retry is required, else 0
  *
@@ -219,38 +341,56 @@ static inline unsigned raw_seqcount_begin(const 
seqcount_t *s)
  * Use carefully, only in critical code, and comment how the barrier is
  * provided.
  */
-static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
+#define __read_seqcount_retry(s, start)        
do___read_seqcount_retry(__to_seqcount_t(s), start)
+
+static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start)
 {
        kcsan_atomic_next(0);
        return unlikely(READ_ONCE(s->sequence) != start);
 }
 
 /**
- * read_seqcount_retry - end a seq-read critical section
- * @s: pointer to seqcount_t
+ * read_seqcount_retry() - end a seq-read critical section
+ * @s: pointer to &typedef seqcount_t or any of the seqcount_locktype_t 
variants
  * @start: count, from read_seqcount_begin
  * Returns: 1 if retry is required, else 0
  *
- * read_seqcount_retry closes a read critical section of the given seqcount.
+ * read_seqcount_retry closes a read critical section of given seqcount_t.
  * If the critical section was invalid, it must be ignored (and typically
  * retried).
  */
-static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
+#define read_seqcount_retry(s, start)  
do_read_seqcount_retry(__to_seqcount_t(s), start)
+
+static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start)
 {
        smp_rmb();
-       return __read_seqcount_retry(s, start);
+       return do___read_seqcount_retry(s, start);
 }
 
+#define raw_write_seqcount_begin(s)                                    \
+do {                                                                   \
+       if (__associated_lock_is_preemptible(s))                        \
+               preempt_disable();                                      \
+                                                                       \
+       do_raw_write_seqcount_begin(__to_seqcount_t(s));                \
+} while (0)
 
-
-static inline void raw_write_seqcount_begin(seqcount_t *s)
+static inline void do_raw_write_seqcount_begin(seqcount_t *s)
 {
        kcsan_nestable_atomic_begin();
        s->sequence++;
        smp_wmb();
 }
 
-static inline void raw_write_seqcount_end(seqcount_t *s)
+#define raw_write_seqcount_end(s)                                      \
+do {                                                                   \
+       do_raw_write_seqcount_end(__to_seqcount_t(s));                  \
+                                                                       \
+       if (__associated_lock_is_preemptible(s))                        \
+               preempt_enable();                                       \
+} while (0)
+
+static inline void do_raw_write_seqcount_end(seqcount_t *s)
 {
        smp_wmb();
        s->sequence++;
@@ -258,12 +398,12 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
 }
 
 /**
- * raw_write_seqcount_barrier - do a seq write barrier
- * @s: pointer to seqcount_t
+ * raw_write_seqcount_barrier() - do a seq write barrier
+ * @s: Pointer to &typedef seqcount_t or any of the seqcount_locktype_t 
variants
  *
  * This can be used to provide an ordering guarantee instead of the
  * usual consistency guarantee. It is one wmb cheaper, because we can
- * collapse the two back-to-back wmb()s.
+ * collapse the two back-to-back wmb()s::
  *
  * Note that writes surrounding the barrier should be declared atomic (e.g.
  * via WRITE_ONCE): a) to ensure the writes become visible to other threads
@@ -298,7 +438,9 @@ static inline void raw_write_seqcount_end(seqcount_t *s)
  *              WRITE_ONCE(X, false);
  *      }
  */
-static inline void raw_write_seqcount_barrier(seqcount_t *s)
+#define raw_write_seqcount_barrier(s)  
do_raw_write_seqcount_barrier(__to_seqcount_t(s))
+
+static inline void do_raw_write_seqcount_barrier(seqcount_t *s)
 {
        kcsan_nestable_atomic_begin();
        s->sequence++;
@@ -307,7 +449,24 @@ static inline void raw_write_seqcount_barrier(seqcount_t 
*s)
        kcsan_nestable_atomic_end();
 }
 
-static inline int raw_read_seqcount_latch(seqcount_t *s)
+/**
+ * raw_read_seqcount_latch() - pick even or odd seqcount latch data copy
+ * @s: pointer to &typedef seqcount_t or any of the seqcount_locktype_t 
variants
+ *
+ * Use seqcount latching to switch between two storage places with
+ * sequence protection to allow interruptible, preemptible, writer
+ * sections.
+ *
+ * Check raw_write_seqcount_latch() for more details and a full reader
+ * and writer usage example.
+ *
+ * Return: sequence counter. Use the lowest bit as index for picking
+ * which data copy to read. Full counter must then be checked with
+ * read_seqcount_retry().
+ */
+#define raw_read_seqcount_latch(s)     
do_raw_read_seqcount_latch(__to_seqcount_t(s))
+
+static inline int do_raw_read_seqcount_latch(seqcount_t *s)
 {
        /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
        int seq = READ_ONCE(s->sequence); /* ^^^ */
@@ -315,8 +474,8 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
 }
 
 /**
- * raw_write_seqcount_latch - redirect readers to even/odd copy
- * @s: pointer to seqcount_t
+ * raw_write_seqcount_latch() - redirect readers to even/odd copy
+ * @s: pointer to &typedef seqcount_t or any of the seqcount_locktype_t 
variants
  *
  * The latch technique is a multiversion concurrency control method that allows
  * queries during non-atomic modifications. If you can guarantee queries never
@@ -332,101 +491,164 @@ static inline int raw_read_seqcount_latch(seqcount_t *s)
  * Very simply put: we first modify one copy and then the other. This ensures
  * there is always one copy in a stable state, ready to give us an answer.
  *
- * The basic form is a data structure like:
+ * The basic form is a data structure like::
  *
- * struct latch_struct {
- *     seqcount_t              seq;
- *     struct data_struct      data[2];
- * };
+ *     struct latch_struct {
+ *             seqcount_t              seq;
+ *             struct data_struct      data[2];
+ *     };
  *
  * Where a modification, which is assumed to be externally serialized, does the
- * following:
+ * following::
  *
- * void latch_modify(struct latch_struct *latch, ...)
- * {
- *     smp_wmb();      <- Ensure that the last data[1] update is visible
- *     latch->seq++;
- *     smp_wmb();      <- Ensure that the seqcount update is visible
+ *     void latch_modify(struct latch_struct *latch, ...)
+ *     {
+ *             smp_wmb();      // Ensure that the last data[1] update is 
visible
+ *             latch->seq++;
+ *             smp_wmb();      // Ensure that the seqcount update is visible
  *
- *     modify(latch->data[0], ...);
+ *             modify(latch->data[0], ...);
  *
- *     smp_wmb();      <- Ensure that the data[0] update is visible
- *     latch->seq++;
- *     smp_wmb();      <- Ensure that the seqcount update is visible
+ *             smp_wmb();      // Ensure that the data[0] update is visible
+ *             latch->seq++;
+ *             smp_wmb();      // Ensure that the seqcount update is visible
  *
- *     modify(latch->data[1], ...);
- * }
+ *             modify(latch->data[1], ...);
+ *     }
  *
- * The query will have a form like:
+ * The query will have a form like::
  *
- * struct entry *latch_query(struct latch_struct *latch, ...)
- * {
- *     struct entry *entry;
- *     unsigned seq, idx;
+ *     struct entry *latch_query(struct latch_struct *latch, ...)
+ *     {
+ *             struct entry *entry;
+ *             unsigned seq, idx;
  *
- *     do {
- *             seq = raw_read_seqcount_latch(&latch->seq);
+ *             do {
+ *                     seq = raw_read_seqcount_latch(&latch->seq);
  *
- *             idx = seq & 0x01;
- *             entry = data_query(latch->data[idx], ...);
+ *                     idx = seq & 0x01;
+ *                     entry = data_query(latch->data[idx], ...);
  *
- *             smp_rmb();
- *     } while (seq != latch->seq);
+ *                     // read_seqcount_retry() includes necessary smp_rmb()
+ *             } while (read_seqcount_retry(&latch->seq, seq);
  *
- *     return entry;
- * }
+ *             return entry;
+ *     }
  *
  * So during the modification, queries are first redirected to data[1]. Then we
  * modify data[0]. When that is complete, we redirect queries back to data[0]
  * and we can modify data[1].
  *
- * NOTE: The non-requirement for atomic modifications does _NOT_ include
- *       the publishing of new entries in the case where data is a dynamic
- *       data structure.
+ * NOTE:
+ *
+ *     The non-requirement for atomic modifications does _NOT_ include
+ *     the publishing of new entries in the case where data is a dynamic
+ *     data structure.
  *
- *       An iteration might start in data[0] and get suspended long enough
- *       to miss an entire modification sequence, once it resumes it might
- *       observe the new entry.
+ *     An iteration might start in data[0] and get suspended long enough
+ *     to miss an entire modification sequence, once it resumes it might
+ *     observe the new entry.
  *
- * NOTE: When data is a dynamic data structure; one should use regular RCU
- *       patterns to manage the lifetimes of the objects within.
+ * NOTE:
+ *
+ *     When data is a dynamic data structure; one should use regular RCU
+ *     patterns to manage the lifetimes of the objects within.
  */
-static inline void raw_write_seqcount_latch(seqcount_t *s)
+#define raw_write_seqcount_latch(s)    
do_raw_write_seqcount_latch(__to_seqcount_t(s))
+
+static inline void do_raw_write_seqcount_latch(seqcount_t *s)
 {
        smp_wmb();      /* prior stores before incrementing "sequence" */
        s->sequence++;
        smp_wmb();      /* increment "sequence" before following stores */
 }
 
+static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass)
+{
+       do_raw_write_seqcount_begin(s);
+       seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
+}
+
+#define write_seqcount_begin_nested(s, subclass)               \
+do {                                                                   \
+       __assert_associated_lock_held(s);                               \
+                                                                       \
+       if (__associated_lock_is_preemptible(s))                        \
+               preempt_disable();                                      \
+                                                                       \
+       do_write_seqcount_begin_nested(__to_seqcount_t(s), subclass);   \
+} while (0)
+
+static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass)
+{
+//     lockdep_assert_preemption_disabled();
+       __write_seqcount_begin_nested(s, subclass);
+}
+
 /*
- * Sequence counter only version assumes that callers are using their
- * own mutexing.
+ * write_seqcount_t_begin() without lockdep non-preemptibility checks.
+ *
+ * Use for internal seqlock.h code where it's known that preemption is
+ * already disabled. For example, seqlock_t write side functions.
  */
-static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
+static inline void __write_seqcount_begin(seqcount_t *s)
 {
-       raw_write_seqcount_begin(s);
-       seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
+       __write_seqcount_begin_nested(s, 0);
 }
 
-static inline void write_seqcount_begin(seqcount_t *s)
+/**
+ * write_seqcount_begin() - start a seqcount write-side critical section
+ * @s: Pointer to &typedef seqcount_t or any of the seqcount_locktype_t 
variants
+ *
+ * write_seqcount_begin opens a write-side critical section of the given
+ * seqcount. Seqcount write-side critical sections must be externally
+ * serialized and non-preemptible.
+ */
+#define write_seqcount_begin(s)                                                
\
+do {                                                                   \
+       __assert_associated_lock_held(s);                               \
+                                                                       \
+       if (__associated_lock_is_preemptible(s))                        \
+               preempt_disable();                                      \
+                                                                       \
+       do_write_seqcount_begin(__to_seqcount_t(s));                    \
+} while (0)
+
+static inline void do_write_seqcount_begin(seqcount_t *s)
 {
        write_seqcount_begin_nested(s, 0);
 }
 
-static inline void write_seqcount_end(seqcount_t *s)
+/**
+ * write_seqcount_end() - end a seqcount_t write-side critical section
+ * @s: Pointer to &typedef seqcount_t
+ *
+ * The write section must've been opened with write_seqcount_begin().
+ */
+#define write_seqcount_end(s)                                          \
+do {                                                                   \
+       do_write_seqcount_end(__to_seqcount_t(s));                      \
+                                                                       \
+       if (__associated_lock_is_preemptible(s))                        \
+               preempt_enable();                                       \
+} while (0)
+
+static inline void do_write_seqcount_end(seqcount_t *s)
 {
        seqcount_release(&s->dep_map, _RET_IP_);
        raw_write_seqcount_end(s);
 }
 
 /**
- * write_seqcount_invalidate - invalidate in-progress read-side seq operations
- * @s: pointer to seqcount_t
+ * write_seqcount_invalidate() - invalidate in-progress read-side seq 
operations
+ * @s: Pointer to &typedef seqcount_t or any of the seqcount_locktype_t 
variants
  *
  * After write_seqcount_invalidate, no read-side seq operations will complete
  * successfully and see data older than this.
  */
-static inline void write_seqcount_invalidate(seqcount_t *s)
+#define write_seqcount_invalidate(s)   
do_write_seqcount_invalidate(__to_seqcount_t(s))
+
+static inline void do_write_seqcount_invalidate(seqcount_t *s)
 {
        smp_wmb();
        kcsan_nestable_atomic_begin();
@@ -434,42 +656,74 @@ static inline void write_seqcount_invalidate(seqcount_t 
*s)
        kcsan_nestable_atomic_end();
 }
 
+/*
+ * Sequential locks (seqlock_t)
+ *
+ * Sequence counters with an embedded spinlock for writer serialization
+ * and non-preemptibility.
+ *
+ * For more info, see:
+ *   - Comments on top of seqcount_t
+ *   - Documentation/locking/seqlock.rst
+ */
 typedef struct {
        struct seqcount seqcount;
        spinlock_t lock;
 } seqlock_t;
 
-/*
- * These macros triggered gcc-3.x compile-time problems.  We think these are
- * OK now.  Be cautious.
- */
 #define __SEQLOCK_UNLOCKED(lockname)                   \
        {                                               \
                .seqcount = SEQCNT_ZERO(lockname),      \
                .lock = __SPIN_LOCK_UNLOCKED(lockname)  \
        }
 
-#define seqlock_init(x)                                        \
+/**
+ * seqlock_init() - dynamic initializer for seqlock_t
+ * @sl: Pointer to the &typedef seqlock_t instance
+ */
+#define seqlock_init(sl)                               \
        do {                                            \
-               seqcount_init(&(x)->seqcount);          \
-               spin_lock_init(&(x)->lock);             \
+               seqcount_init(&(sl)->seqcount);         \
+               spin_lock_init(&(sl)->lock);            \
        } while (0)
 
-#define DEFINE_SEQLOCK(x) \
-               seqlock_t x = __SEQLOCK_UNLOCKED(x)
+/**
+ * DEFINE_SEQLOCK() - Define a statically-allocated seqlock_t
+ * @sl: Name of the &typedef seqlock_t instance
+ */
+#define DEFINE_SEQLOCK(sl) \
+               seqlock_t sl = __SEQLOCK_UNLOCKED(sl)
 
-/*
- * Read side functions for starting and finalizing a read side section.
+/**
+ * read_seqbegin() - start a seqlock_t read-side critical section
+ * @sl: Pointer to &typedef seqlock_t
+ *
+ * read_seqbegin opens a read side critical section of the given
+ * seqlock_t. Validity of the critical section is tested by checking
+ * read_seqretry().
+ *
+ * Return: count to be passed to read_seqretry()
  */
 static inline unsigned read_seqbegin(const seqlock_t *sl)
 {
-       unsigned ret = read_seqcount_begin(&sl->seqcount);
+       unsigned ret = do_read_seqcount_begin(&sl->seqcount);
 
        kcsan_atomic_next(0);  /* non-raw usage, assume closing read_seqretry() 
*/
        kcsan_flat_atomic_begin();
        return ret;
 }
 
+/**
+ * read_seqretry() - end and validate a seqlock_t read side section
+ * @sl: Pointer to &typedef seqlock_t
+ * @start: count, from read_seqbegin()
+ *
+ * read_seqretry closes the given seqlock_t read side critical section,
+ * and checks its validity. If the read section was invalid, it must be
+ * ignored and retried.
+ *
+ * Return: 1 if a retry is required, 0 otherwise
+ */
 static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
 {
        /*
@@ -478,47 +732,94 @@ static inline unsigned read_seqretry(const seqlock_t *sl, 
unsigned start)
         */
        kcsan_flat_atomic_end();
 
-       return read_seqcount_retry(&sl->seqcount, start);
+       return do_read_seqcount_retry(&sl->seqcount, start);
 }
 
-/*
- * Lock out other writers and update the count.
- * Acts like a normal spin_lock/unlock.
- * Don't need preempt_disable() because that is in the spin_lock already.
+/**
+ * write_seqlock() - start a seqlock_t write side critical section
+ * @sl: Pointer to &typedef seqlock_t
+ *
+ * write_seqlock opens a write side critical section of the given
+ * seqlock_t.  It also acquires the spinlock_t embedded inside the
+ * sequential lock. All the seqlock_t write side critical sections are
+ * thus automatically serialized and non-preemptible.
+ *
+ * Use the ``_irqsave`` and ``_bh`` variants instead if the read side
+ * can be invoked from a hardirq or softirq context.
+ *
+ * The opened write side section must be closed with write_sequnlock().
  */
 static inline void write_seqlock(seqlock_t *sl)
 {
        spin_lock(&sl->lock);
-       write_seqcount_begin(&sl->seqcount);
+       __write_seqcount_begin(&sl->seqcount);
 }
 
+/**
+ * write_sequnlock() - end a seqlock_t write side critical section
+ * @sl: Pointer to &typedef seqlock_t
+ *
+ * write_sequnlock closes the (serialized and non-preemptible) write
+ * side critical section of given seqlock_t.
+ */
 static inline void write_sequnlock(seqlock_t *sl)
 {
-       write_seqcount_end(&sl->seqcount);
+       do_write_seqcount_end(&sl->seqcount);
        spin_unlock(&sl->lock);
 }
 
+/**
+ * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section
+ * @sl: Pointer to &typedef seqlock_t
+ *
+ * ``_bh`` variant of write_seqlock(). Use only if the read side section
+ * can be invoked from a softirq context.
+ *
+ * The opened write section must be closed with write_sequnlock_bh().
+ */
 static inline void write_seqlock_bh(seqlock_t *sl)
 {
        spin_lock_bh(&sl->lock);
-       write_seqcount_begin(&sl->seqcount);
+       __write_seqcount_begin(&sl->seqcount);
 }
 
+/**
+ * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section
+ * @sl: Pointer to &typedef seqlock_t
+ *
+ * write_sequnlock_bh closes the serialized, non-preemptible,
+ * softirqs-disabled, seqlock_t write side critical section opened with
+ * write_seqlock_bh().
+ */
 static inline void write_sequnlock_bh(seqlock_t *sl)
 {
-       write_seqcount_end(&sl->seqcount);
+       do_write_seqcount_end(&sl->seqcount);
        spin_unlock_bh(&sl->lock);
 }
 
+/**
+ * write_seqlock_irq() - start a non-interruptible seqlock_t write side section
+ * @sl: Pointer to &typedef seqlock_t
+ *
+ * This is the ``_irq`` variant of write_seqlock(). Use only if the read
+ * section of given seqlock_t can be invoked from a hardirq context.
+ */
 static inline void write_seqlock_irq(seqlock_t *sl)
 {
        spin_lock_irq(&sl->lock);
-       write_seqcount_begin(&sl->seqcount);
+       __write_seqcount_begin(&sl->seqcount);
 }
 
+/**
+ * write_sequnlock_irq() - end a non-interruptible seqlock_t write side section
+ * @sl: Pointer to &typedef seqlock_t
+ *
+ * ``_irq`` variant of write_sequnlock(). The write side section of
+ * given seqlock_t must've been opened with write_seqlock_irq().
+ */
 static inline void write_sequnlock_irq(seqlock_t *sl)
 {
-       write_seqcount_end(&sl->seqcount);
+       do_write_seqcount_end(&sl->seqcount);
        spin_unlock_irq(&sl->lock);
 }
 
@@ -527,44 +828,98 @@ static inline unsigned long 
__write_seqlock_irqsave(seqlock_t *sl)
        unsigned long flags;
 
        spin_lock_irqsave(&sl->lock, flags);
-       write_seqcount_begin(&sl->seqcount);
+       __write_seqcount_begin(&sl->seqcount);
+
        return flags;
 }
 
+/**
+ * write_seqlock_irqsave() - start a non-interruptible seqlock_t write section
+ * @lock:  Pointer to &typedef seqlock_t
+ * @flags: Stack-allocated storage for saving caller's local interrupt
+ *         state, to be passed to write_sequnlock_irqrestore().
+ *
+ * ``_irqsave`` variant of write_seqlock(). Use if the read section of
+ * given seqlock_t can be invoked from a hardirq context.
+ *
+ * The opened write section must be closed with write_sequnlock_irqrestore().
+ */
 #define write_seqlock_irqsave(lock, flags)                             \
        do { flags = __write_seqlock_irqsave(lock); } while (0)
 
+/**
+ * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write section
+ * @sl:    Pointer to &typedef seqlock_t
+ * @flags: Caller's saved interrupt state, from write_seqlock_irqsave()
+ *
+ * ``_irqrestore`` variant of write_sequnlock(). The write section of
+ * given seqlock_t must've been opened with write_seqlock_irqsave().
+ */
 static inline void
 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
 {
-       write_seqcount_end(&sl->seqcount);
+       do_write_seqcount_end(&sl->seqcount);
        spin_unlock_irqrestore(&sl->lock, flags);
 }
 
-/*
- * A locking reader exclusively locks out other writers and locking readers,
- * but doesn't update the sequence number. Acts like a normal spin_lock/unlock.
- * Don't need preempt_disable() because that is in the spin_lock already.
+/**
+ * read_seqlock_excl() - begin a seqlock_t locking reader critical section
+ * @sl: Pointer to &typedef seqlock_t
+ *
+ * read_seqlock_excl opens a locking reader critical section for the
+ * given seqlock_t. A locking reader exclusively locks out other writers
+ * and other *locking* readers, but doesn't update the sequence number.
+ *
+ * Locking readers act like a normal spin_lock()/spin_unlock().
+ *
+ * The opened read side section must be closed with read_sequnlock_excl().
  */
 static inline void read_seqlock_excl(seqlock_t *sl)
 {
        spin_lock(&sl->lock);
 }
 
+/**
+ * read_sequnlock_excl() - end a seqlock_t locking reader critical section
+ * @sl: Pointer to &typedef seqlock_t
+ *
+ * read_sequnlock_excl closes the locking reader critical section opened
+ * with read_seqlock_excl().
+ */
 static inline void read_sequnlock_excl(seqlock_t *sl)
 {
        spin_unlock(&sl->lock);
 }
 
 /**
- * read_seqbegin_or_lock - begin a sequence number check or locking block
- * @lock: sequence lock
- * @seq : sequence number to be checked
+ * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader
+ * @lock: Pointer to &typedef seqlock_t
+ * @seq : Marker and return parameter. If the passed value is even, the
+ * reader will become a *lockless* seqlock_t sequence counter reader as
+ * in read_seqbegin(). If the passed value is odd, the reader will
+ * become a fully locking reader, as in read_seqlock_excl().  In the
+ * first call to read_seqbegin_or_lock(), the caller **must** initialize
+ * and pass an even value to @seq so a lockless read is optimistically
+ * tried first.
+ *
+ * read_seqbegin_or_lock is an API designed to optimistically try a
+ * normal lockless seqlock_t read section first, as in read_seqbegin().
+ * If an odd counter is found, the normal lockless read trial has
+ * failed, and the next reader iteration transforms to a full seqlock_t
+ * locking reader as in read_seqlock_excl().
  *
- * First try it once optimistically without taking the lock. If that fails,
- * take the lock. The sequence number is also used as a marker for deciding
- * whether to be a reader (even) or writer (odd).
- * N.B. seq must be initialized to an even number to begin with.
+ * This is typically used to avoid lockless seqlock_t readers starvation
+ * (too much retry loops) in the case of a sharp spike in write
+ * activity.
+ *
+ * The opened read section must be closed with done_seqretry().  Check
+ * Documentation/locking/seqlock.rst for template example code.
+ *
+ * Return: The encountered sequence counter value, returned through the
+ * @seq parameter, which is overloaded as a return parameter. The
+ * returned value must be checked with need_seqretry(). If the read
+ * section must be retried, the returned value must also be passed to
+ * the @seq parameter of the next read_seqbegin_or_lock() iteration.
  */
 static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
 {
@@ -574,32 +929,90 @@ static inline void read_seqbegin_or_lock(seqlock_t *lock, 
int *seq)
                read_seqlock_excl(lock);
 }
 
+/**
+ * need_seqretry() - validate seqlock_t "locking or lockless" reader section
+ * @lock: Pointer to &typedef seqlock_t
+ * @seq: count, from read_seqbegin_or_lock()
+ *
+ * need_seqretry checks if the seqlock_t read-side critical section
+ * started with read_seqbegin_or_lock() is valid. If it was not, the
+ * caller must retry the read-side section.
+ *
+ * Return: 1 if a retry is required, 0 otherwise
+ */
 static inline int need_seqretry(seqlock_t *lock, int seq)
 {
        return !(seq & 1) && read_seqretry(lock, seq);
 }
 
+/**
+ * done_seqretry() - end seqlock_t "locking or lockless" reader section
+ * @lock: Pointer to &typedef seqlock_t
+ * @seq: count, from read_seqbegin_or_lock()
+ *
+ * done_seqretry finishes the seqlock_t read side critical section
+ * started by read_seqbegin_or_lock(). The read section must've been
+ * already validated with need_seqretry().
+ */
 static inline void done_seqretry(seqlock_t *lock, int seq)
 {
        if (seq & 1)
                read_sequnlock_excl(lock);
 }
 
+/**
+ * read_seqlock_excl_bh() - start a locking reader seqlock_t section
+ *                         with softirqs disabled
+ * @sl: Pointer to &typedef seqlock_t
+ *
+ * ``_bh`` variant of read_seqlock_excl(). Use this variant if the
+ * seqlock_t write side section, *or other read sections*, can be
+ * invoked from a softirq context
+ *
+ * The opened section must be closed with read_sequnlock_excl_bh().
+ */
 static inline void read_seqlock_excl_bh(seqlock_t *sl)
 {
        spin_lock_bh(&sl->lock);
 }
 
+/**
+ * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking
+ *                           reader section
+ * @sl: Pointer to &typedef seqlock_t
+ *
+ * ``_bh`` variant of read_sequnlock_excl(). The closed section must've
+ * been opened with read_seqlock_excl_bh().
+ */
 static inline void read_sequnlock_excl_bh(seqlock_t *sl)
 {
        spin_unlock_bh(&sl->lock);
 }
 
+/**
+ * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking
+ *                          reader section
+ * @sl: Pointer to &typedef seqlock_t
+ *
+ * ``_irq`` variant of read_seqlock_excl(). Use this only if the
+ * seqlock_t write side critical section, *or other read side sections*,
+ * can be invoked from a hardirq context.
+ *
+ * The opened read section must be closed with read_sequnlock_excl_irq().
+ */
 static inline void read_seqlock_excl_irq(seqlock_t *sl)
 {
        spin_lock_irq(&sl->lock);
 }
 
+/**
+ * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t
+ *                             locking reader section
+ * @sl: Pointer to &typedef seqlock_t
+ *
+ * ``_irq`` variant of read_sequnlock_excl(). The closed section must've
+ * been opened with read_seqlock_excl_irq().
+ */
 static inline void read_sequnlock_excl_irq(seqlock_t *sl)
 {
        spin_unlock_irq(&sl->lock);
@@ -613,15 +1026,59 @@ static inline unsigned long 
__read_seqlock_excl_irqsave(seqlock_t *sl)
        return flags;
 }
 
+/**
+ * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t
+ *                              locking reader section
+ * @lock: Pointer to &typedef seqlock_t
+ * @flags: Stack-allocated storage for saving caller's local interrupt
+ *         state, to be passed to read_sequnlock_excl_irqrestore().
+ *
+ * ``_irqsave`` variant of read_seqlock_excl(). Use this only if the
+ * seqlock_t write side critical section, *or other read side sections*,
+ * can be invoked from a hardirq context.
+ *
+ * Opened section must be closed with read_sequnlock_excl_irqrestore().
+ */
 #define read_seqlock_excl_irqsave(lock, flags)                         \
        do { flags = __read_seqlock_excl_irqsave(lock); } while (0)
 
+/**
+ * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t
+ *                                   locking reader section
+ * @sl: Pointer to &typedef seqlock_t
+ * @flags: Caller's saved interrupt state, from
+ *        read_seqlock_excl_irqsave()
+ *
+ * ``_irqrestore`` variant of read_sequnlock_excl(). The closed section
+ * must've been opened with read_seqlock_excl_irqsave().
+ */
 static inline void
 read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
 {
        spin_unlock_irqrestore(&sl->lock, flags);
 }
 
+/**
+ * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or
+ *                                   a non-interruptible locking reader
+ * @lock: Pointer to &typedef seqlock_t
+ * @seq: Marker and return parameter. Check read_seqbegin_or_lock().
+ *
+ * This is the ``_irqsave`` variant of read_seqbegin_or_lock(). Use if
+ * the seqlock_t write side critical section, *or other read side sections*,
+ * can be invoked from hardirq context.
+ *
+ * The validity of the read section must be checked with need_seqretry().
+ * The opened section must be closed with done_seqretry_irqrestore().
+ *
+ * Return:
+ *
+ *   1. The saved local interrupts state in case of a locking reader, to be
+ *      passed to done_seqretry_irqrestore().
+ *
+ *   2. The encountered sequence counter value, returned through @seq which
+ *      is overloaded as a return parameter. Check read_seqbegin_or_lock().
+ */
 static inline unsigned long
 read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
 {
@@ -635,6 +1092,18 @@ read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
        return flags;
 }
 
+/**
+ * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a
+ *                             non-interruptible locking reader section
+ * @lock:  Pointer to &typedef seqlock_t
+ * @seq:   Count, from read_seqbegin_or_lock_irqsave()
+ * @flags: Caller's saved local interrupt state in case of a locking
+ *        reader, also from read_seqbegin_or_lock_irqsave()
+ *
+ * This is the ``_irqrestore`` variant of done_seqretry(). The read
+ * section must've been opened with read_seqbegin_or_lock_irqsave(), and
+ * validated with need_seqretry().
+ */
 static inline void
 done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags)
 {

Re: [PATCH v3 06/20] seqlock: Extend seqcount API with associated locks

Reply via email to