This patch applies on top of: git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git locking/rfc
--- For qspinlocks on ARM64, we would like to use WFE instead of purely spinning. Qspinlocks internally have lock contenders spin on an MCS lock. Update arch_mcs_spin_lock_contended() such that it uses the new smp_cond_load_acquire() so that ARM64 can also override this spin loop with its own implementation using WFE. On x86, this can also be cheaper than spinning on smp_load_acquire(). Signed-off-by: Jason Low <[email protected]> --- v2 -> v3: - Add additional comments about the use of smp_cond_load_acquire(). v1 -> v2: - Pass l instead of &l to smp_cond_load_acquire() since l is already a pointer to the lock variable. kernel/locking/mcs_spinlock.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index c835270..c19ce41 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -22,13 +22,15 @@ struct mcs_spinlock { #ifndef arch_mcs_spin_lock_contended /* - * Using smp_load_acquire() provides a memory barrier that ensures - * subsequent operations happen after the lock is acquired. + * Using smp_cond_load_acquire() provides the acquire semantics + * required so that subsequent operations happen after the + * lock is acquired. Additionally, some architectures such as + * ARM64 would like to do spin-waiting instead of purely + * spinning, and smp_cond_load_acquire() provides that behavior. */ #define arch_mcs_spin_lock_contended(l) \ do { \ - while (!(smp_load_acquire(l))) \ - cpu_relax_lowlatency(); \ + smp_cond_load_acquire(l, VAL); \ } while (0) #endif -- 2.1.4

