On Thu, Jul 14, 2016 at 08:36:09PM +0200, Peter Zijlstra wrote: > On Thu, Jul 14, 2016 at 08:09:52PM +0200, Oleg Nesterov wrote: > > On 07/14, Peter Zijlstra wrote: > > > > > > The below is a compile tested only first draft so far. I'll go give it > > > some runtime next. > > > > So I will wait for the new version, but at first glance this matches the > > code I already reviewed in the past (at least, tried hard to review ;) > > and it looks correct. > > > > Just a couple of almost cosmetic nits, feel free to ignore. > > Drat, I just mailed out the patches.. I can do a second version later.
How about so on top of what I sent? --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -13,11 +13,10 @@ struct percpu_rw_semaphore { unsigned int __percpu *read_count; struct rw_semaphore rw_sem; wait_queue_head_t writer; - int state; + int readers_block; }; -extern void __percpu_down_read(struct percpu_rw_semaphore *); -extern int __percpu_down_read_trylock(struct percpu_rw_semaphore *); +extern int __percpu_down_read(struct percpu_rw_semaphore *, int); extern void __percpu_up_read(struct percpu_rw_semaphore *); static inline void percpu_down_read(struct percpu_rw_semaphore *sem) @@ -37,7 +36,7 @@ static inline void percpu_down_read(stru */ __this_cpu_inc(*sem->read_count); if (unlikely(!rcu_sync_is_idle(&sem->rss))) - __percpu_down_read(sem); /* Unconditional memory barrier */ + __percpu_down_read(sem, false); /* Unconditional memory barrier */ preempt_enable(); /* * The barrier() from preempt_enable() prevents the compiler from @@ -55,7 +54,7 @@ static inline int percpu_down_read_trylo */ __this_cpu_inc(*sem->read_count); if (unlikely(!rcu_sync_is_idle(&sem->rss))) - ret = __percpu_down_read_trylock(sem); + ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */ preempt_enable(); /* * The barrier() from preempt_enable() prevents the compiler from --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -8,8 +8,6 @@ #include <linux/sched.h> #include <linux/errno.h> -enum { readers_slow, readers_block }; - int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, const char *name, struct lock_class_key *rwsem_key) { @@ -21,7 +19,7 @@ int __percpu_init_rwsem(struct percpu_rw rcu_sync_init(&sem->rss, RCU_SCHED_SYNC); __init_rwsem(&sem->rw_sem, name, rwsem_key); init_waitqueue_head(&sem->writer); - sem->state = readers_slow; + sem->readers_block = 0; return 0; } EXPORT_SYMBOL_GPL(__percpu_init_rwsem); @@ -41,21 +39,21 @@ void percpu_free_rwsem(struct percpu_rw_ } EXPORT_SYMBOL_GPL(percpu_free_rwsem); -void __percpu_down_read(struct percpu_rw_semaphore *sem) +int __percpu_down_read(struct percpu_rw_semaphore *sem, int try) { /* * Due to having preemption disabled the decrement happens on * the same CPU as the increment, avoiding the * increment-on-one-CPU-and-decrement-on-another problem. * - * And yes, if the reader misses the writer's assignment of - * readers_block to sem->state, then the writer is - * guaranteed to see the reader's increment. Conversely, any - * readers that increment their sem->read_count after the - * writer looks are guaranteed to see the readers_block value, + * If the reader misses the writer's assignment of readers_block, then + * the writer is guaranteed to see the reader's increment. + * + * Conversely, any readers that increment their sem->read_count after + * the writer looks are guaranteed to see the readers_block value, * which in turn means that they are guaranteed to immediately - * decrement their sem->read_count, so that it doesn't matter - * that the writer missed them. + * decrement their sem->read_count, so that it doesn't matter that the + * writer missed them. */ smp_mb(); /* A matches D */ @@ -64,8 +62,8 @@ void __percpu_down_read(struct percpu_rw * If !readers_block the critical section starts here, matched by the * release in percpu_up_write(). */ - if (likely(smp_load_acquire(&sem->state) != readers_block)) - return; + if (likely(!smp_load_acquire(&sem->readers_block))) + return 1; /* * Per the above comment; we still have preemption disabled and @@ -73,6 +71,9 @@ void __percpu_down_read(struct percpu_rw */ __percpu_up_read(sem); + if (try) + return 0; + /* * We either call schedule() in the wait, or we'll fall through * and reschedule on the preempt_enable() in percpu_down_read(). @@ -87,21 +88,10 @@ void __percpu_down_read(struct percpu_rw __up_read(&sem->rw_sem); preempt_disable(); + return 1; } EXPORT_SYMBOL_GPL(__percpu_down_read); -int __percpu_down_read_trylock(struct percpu_rw_semaphore *sem) -{ - smp_mb(); /* A matches D */ - - if (likely(smp_load_acquire(&sem->state) != readers_block)) - return 1; - - __percpu_up_read(sem); - return 0; -} -EXPORT_SYMBOL_GPL(__percpu_down_read_trylock); - void __percpu_up_read(struct percpu_rw_semaphore *sem) { smp_mb(); /* B matches C */ @@ -150,23 +140,23 @@ static bool readers_active_check(struct void percpu_down_write(struct percpu_rw_semaphore *sem) { - down_write(&sem->rw_sem); - /* Notify readers to take the slow path. */ rcu_sync_enter(&sem->rss); + down_write(&sem->rw_sem); + /* * Notify new readers to block; up until now, and thus throughout the * longish rcu_sync_enter() above, new readers could still come in. */ - WRITE_ONCE(sem->state, readers_block); + WRITE_ONCE(sem->readers_block, 1); smp_mb(); /* D matches A */ /* - * If they don't see our writer of readers_block to sem->state, - * then we are guaranteed to see their sem->read_count increment, and - * therefore will wait for them. + * If they don't see our writer of readers_block, then we are + * guaranteed to see their sem->read_count increment, and therefore + * will wait for them. */ /* Wait for all now active readers to complete. */ @@ -186,7 +176,7 @@ void percpu_up_write(struct percpu_rw_se * Therefore we force it through the slow path which guarantees an * acquire and thereby guarantees the critical section's consistency. */ - smp_store_release(&sem->state, readers_slow); + smp_store_release(&sem->readers_block, 0); /* * Release the write lock, this will allow readers back in the game. @@ -194,9 +184,9 @@ void percpu_up_write(struct percpu_rw_se up_write(&sem->rw_sem); /* - * Once this completes (at least one RCU grace period hence) the reader - * fast path will be available again. Safe to use outside the exclusive - * write lock because its counting. + * Once this completes (at least one RCU-sched grace period hence) the + * reader fast path will be available again. Safe to use outside the + * exclusive write lock because its counting. */ rcu_sync_exit(&sem->rss); }