On Tue, Apr 30, 2019 at 03:28:11PM +0200, Peter Zijlstra wrote:

> Yeah, but AFAIK fs freezing code has a history of doing exactly that..
> This is just the latest incarnation here.
> 
> So the immediate problem here is that the task doing thaw isn't the same
> that did freeze, right? The thing is, I'm not seeing how that isn't a
> problem with upstream either.
> 
> The freeze code seems to do: percpu_down_write() for the various states,
> and then frobs lockdep state.
> 
> Thaw then does the reverse, frobs lockdep and then does: percpu_up_write().
> 
> percpu_down_write() directly relies on down_write(), and
> percpu_up_write() on up_write(). And note how __up_write() has:
> 
>       DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
> 
> So why isn't this same code coming unstuck in mainline?

Anyway; I cobbled together the below. Oleg, could you have a look, I'm
sure I messed it up.

---
 fs/super.c                    | 31 ++----------------
 include/linux/fs.h            |  4 +--
 include/linux/percpu-rwsem.h  | 25 ++++++--------
 kernel/locking/percpu-rwsem.c | 76 ++++++++++++++++++++++++++++++++-----------
 4 files changed, 71 insertions(+), 65 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 583a0124bc39..bf9c54d05edb 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1629,30 +1629,7 @@ EXPORT_SYMBOL(__sb_start_write);
  */
 static void sb_wait_write(struct super_block *sb, int level)
 {
-       percpu_down_write(sb->s_writers.rw_sem + level-1);
-}
-
-/*
- * We are going to return to userspace and forget about these locks, the
- * ownership goes to the caller of thaw_super() which does unlock().
- */
-static void lockdep_sb_freeze_release(struct super_block *sb)
-{
-       int level;
-
-       for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
-               percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, 
_THIS_IP_);
-}
-
-/*
- * Tell lockdep we are holding these locks before we call ->unfreeze_fs(sb).
- */
-static void lockdep_sb_freeze_acquire(struct super_block *sb)
-{
-       int level;
-
-       for (level = 0; level < SB_FREEZE_LEVELS; ++level)
-               percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, 
_THIS_IP_);
+       percpu_down_write_non_owner(sb->s_writers.rw_sem + level-1);
 }
 
 static void sb_freeze_unlock(struct super_block *sb)
@@ -1660,7 +1637,7 @@ static void sb_freeze_unlock(struct super_block *sb)
        int level;
 
        for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
-               percpu_up_write(sb->s_writers.rw_sem + level);
+               percpu_up_write_non_owner(sb->s_writers.rw_sem + level);
 }
 
 /**
@@ -1753,7 +1730,6 @@ int freeze_super(struct super_block *sb)
         * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
         */
        sb->s_writers.frozen = SB_FREEZE_COMPLETE;
-       lockdep_sb_freeze_release(sb);
        up_write(&sb->s_umount);
        return 0;
 }
@@ -1779,14 +1755,11 @@ static int thaw_super_locked(struct super_block *sb)
                goto out;
        }
 
-       lockdep_sb_freeze_acquire(sb);
-
        if (sb->s_op->unfreeze_fs) {
                error = sb->s_op->unfreeze_fs(sb);
                if (error) {
                        printk(KERN_ERR
                                "VFS:Filesystem thaw failed\n");
-                       lockdep_sb_freeze_release(sb);
                        up_write(&sb->s_umount);
                        return error;
                }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd28e7679089..3b61740b90d7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1557,9 +1557,9 @@ void __sb_end_write(struct super_block *sb, int level);
 int __sb_start_write(struct super_block *sb, int level, bool wait);
 
 #define __sb_writers_acquired(sb, lev) \
-       percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
+       percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], _THIS_IP_)
 #define __sb_writers_release(sb, lev)  \
-       percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
+       percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], _THIS_IP_)
 
 /**
  * sb_end_write - drop write access to a superblock
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 03cb4b6f842e..e0c02d0f82a6 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -5,15 +5,15 @@
 #include <linux/atomic.h>
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
-#include <linux/rcuwait.h>
+#include <linux/wait.h>
 #include <linux/rcu_sync.h>
 #include <linux/lockdep.h>
 
 struct percpu_rw_semaphore {
        struct rcu_sync         rss;
        unsigned int __percpu   *read_count;
-       struct rw_semaphore     rw_sem; /* slowpath */
-       struct rcuwait          writer; /* blocked writer */
+       struct rw_semaphore     rw_sem;
+       wait_queue_head_t       writer;
        int                     readers_block;
 };
 
@@ -23,7 +23,7 @@ static struct percpu_rw_semaphore name = {                    
        \
        .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC),        \
        .read_count = &__percpu_rwsem_rc_##name,                        \
        .rw_sem = __RWSEM_INITIALIZER(name.rw_sem),                     \
-       .writer = __RCUWAIT_INITIALIZER(name.writer),                   \
+       .writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer),           \
 }
 
 extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
@@ -95,6 +95,9 @@ static inline void percpu_up_read(struct percpu_rw_semaphore 
*sem)
 extern void percpu_down_write(struct percpu_rw_semaphore *);
 extern void percpu_up_write(struct percpu_rw_semaphore *);
 
+extern void percpu_down_write_non_owner(struct percpu_rw_semaphore *);
+extern void percpu_up_write_non_owner(struct percpu_rw_semaphore *);
+
 extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
                                const char *, struct lock_class_key *);
 
@@ -112,23 +115,15 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore 
*);
        lockdep_assert_held(&(sem)->rw_sem)
 
 static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
-                                       bool read, unsigned long ip)
+                                       unsigned long ip)
 {
        lock_release(&sem->rw_sem.dep_map, 1, ip);
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-       if (!read)
-               sem->rw_sem.owner = RWSEM_OWNER_UNKNOWN;
-#endif
 }
 
 static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
-                                       bool read, unsigned long ip)
+                                       unsigned long ip)
 {
-       lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-       if (!read)
-               sem->rw_sem.owner = current;
-#endif
+       lock_acquire(&sem->rw_sem.dep_map, 0, 1, 1, 1, NULL, ip);
 }
 
 #endif
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index f17dad99eec8..a51fd2a9ee90 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -1,6 +1,7 @@
 #include <linux/atomic.h>
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
+#include <linux/wait.h>
 #include <linux/lockdep.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/rcupdate.h>
@@ -19,7 +20,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
        /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
        rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
        __init_rwsem(&sem->rw_sem, name, rwsem_key);
-       rcuwait_init(&sem->writer);
+       init_waitqueue_head(&sem->writer);
        sem->readers_block = 0;
        return 0;
 }
@@ -40,6 +41,40 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
 }
 EXPORT_SYMBOL_GPL(percpu_free_rwsem);
 
+static void readers_block(struct percpu_rw_semaphore *sem)
+{
+       wait_event_cmd(sem->writer, !sem->readers_block,
+                      __up_read(&sem->rw_sem), __down_read(&sem->rw_sem));
+}
+
+static void block_readers(struct percpu_rw_semaphore *sem)
+{
+       wait_event_exclusive_cmd(sem->writer, !sem->readers_block,
+                                __up_write(&sem->rw_sem),
+                                __down_write(&sem->rw_sem));
+       /*
+        * Notify new readers to block; up until now, and thus throughout the
+        * longish rcu_sync_enter() above, new readers could still come in.
+        */
+       WRITE_ONCE(sem->readers_block, 1);
+}
+
+static void unblock_readers(struct percpu_rw_semaphore *sem)
+{
+       /*
+        * Signal the writer is done, no fast path yet.
+        *
+        * One reason that we cannot just immediately flip to readers_fast is
+        * that new readers might fail to see the results of this writer's
+        * critical section.
+        *
+        * Therefore we force it through the slow path which guarantees an
+        * acquire and thereby guarantees the critical section's consistency.
+        */
+       smp_store_release(&sem->readers_block, 0);
+       wake_up(&sem->writer);
+}
+
 int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
 {
        /*
@@ -85,6 +120,9 @@ int __percpu_down_read(struct percpu_rw_semaphore *sem, int 
try)
         * Avoid lockdep for the down/up_read() we already have them.
         */
        __down_read(&sem->rw_sem);
+
+       readers_block(sem);
+
        this_cpu_inc(*sem->read_count);
        __up_read(&sem->rw_sem);
 
@@ -104,7 +142,7 @@ void __percpu_up_read(struct percpu_rw_semaphore *sem)
        __this_cpu_dec(*sem->read_count);
 
        /* Prod writer to recheck readers_active */
-       rcuwait_wake_up(&sem->writer);
+       wake_up(&sem->writer);
 }
 EXPORT_SYMBOL_GPL(__percpu_up_read);
 
@@ -146,11 +184,7 @@ void percpu_down_write(struct percpu_rw_semaphore *sem)
 
        down_write(&sem->rw_sem);
 
-       /*
-        * Notify new readers to block; up until now, and thus throughout the
-        * longish rcu_sync_enter() above, new readers could still come in.
-        */
-       WRITE_ONCE(sem->readers_block, 1);
+       block_readers(sem);
 
        smp_mb(); /* D matches A */
 
@@ -161,23 +195,13 @@ void percpu_down_write(struct percpu_rw_semaphore *sem)
         */
 
        /* Wait for all now active readers to complete. */
-       rcuwait_wait_event(&sem->writer, readers_active_check(sem));
+       wait_event(sem->writer, readers_active_check(sem));
 }
 EXPORT_SYMBOL_GPL(percpu_down_write);
 
 void percpu_up_write(struct percpu_rw_semaphore *sem)
 {
-       /*
-        * Signal the writer is done, no fast path yet.
-        *
-        * One reason that we cannot just immediately flip to readers_fast is
-        * that new readers might fail to see the results of this writer's
-        * critical section.
-        *
-        * Therefore we force it through the slow path which guarantees an
-        * acquire and thereby guarantees the critical section's consistency.
-        */
-       smp_store_release(&sem->readers_block, 0);
+       unblock_readers(sem);
 
        /*
         * Release the write lock, this will allow readers back in the game.
@@ -191,4 +215,18 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
         */
        rcu_sync_exit(&sem->rss);
 }
+EXPORT_SYMBOL_GPL(percpu_up_write_non_owner);
+
+void percpu_down_write_non_owner(struct percpu_rw_semaphore *sem)
+{
+       percpu_down_write(sem);
+       up_write(&sem->rw_sem);
+}
+EXPORT_SYMBOL_GPL(percpu_down_write_non_owner);
+
+void percpu_up_write_non_owner(struct percpu_rw_semaphore *sem)
+{
+       down_write(&sem->rw_sem);
+       percpu_up_write(sem);
+}
 EXPORT_SYMBOL_GPL(percpu_up_write);

Reply via email to