On 7/21/20 7:08 AM, Nicholas Piggin wrote:
diff --git a/arch/powerpc/include/asm/qspinlock.h 
b/arch/powerpc/include/asm/qspinlock.h
index b752d34517b3..26d8766a1106 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -31,16 +31,57 @@ static inline void queued_spin_unlock(struct qspinlock 
*lock)
#else
  extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void queued_spin_lock_slowpath_queue(struct qspinlock *lock);
  #endif
static __always_inline void queued_spin_lock(struct qspinlock *lock)
  {
-       u32 val = 0;
-
-       if (likely(atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL)))
+       atomic_t *a = &lock->val;
+       u32 val;
+
+again:
+       asm volatile(
+"1:\t"       PPC_LWARX(%0,0,%1,1) "     # queued_spin_lock                      
\n"
+       : "=&r" (val)
+       : "r" (&a->counter)
+       : "memory");
+
+       if (likely(val == 0)) {
+               asm_volatile_goto(
+       "  stwcx.  %0,0,%1                                                 \n"
+       "  bne-    %l[again]                                               \n"
+       "\t"  PPC_ACQUIRE_BARRIER "                                              
\n"
+               :
+               : "r"(_Q_LOCKED_VAL), "r" (&a->counter)
+               : "cr0", "memory"
+               : again );
                return;
-
-       queued_spin_lock_slowpath(lock, val);
+       }
+
+       if (likely(val == _Q_LOCKED_VAL)) {
+               asm_volatile_goto(
+       "  stwcx.  %0,0,%1                                                 \n"
+       "  bne-    %l[again]                                               \n"
+               :
+               : "r"(_Q_LOCKED_VAL | _Q_PENDING_VAL), "r" (&a->counter)
+               : "cr0", "memory"
+               : again );
+
+               atomic_cond_read_acquire(a, !(VAL & _Q_LOCKED_MASK));
+//             clear_pending_set_locked(lock);
+               WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
+//             lockevent_inc(lock_pending);
+               return;
+       }
+
+       if (val == _Q_PENDING_VAL) {
+               int cnt = _Q_PENDING_LOOPS;
+               val = atomic_cond_read_relaxed(a,
+                                              (VAL != _Q_PENDING_VAL) || 
!cnt--);
+               if (!(val & ~_Q_LOCKED_MASK))
+                       goto again;
+        }
+       queued_spin_lock_slowpath_queue(lock);
  }
  #define queued_spin_lock queued_spin_lock

I am fine with the arch code override some part of the generic code.


diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index b9515fcc9b29..ebcc6f5d99d5 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -287,10 +287,14 @@ static __always_inline u32  __pv_wait_head_or_lock(struct 
qspinlock *lock,
#ifdef CONFIG_PARAVIRT_SPINLOCKS
  #define queued_spin_lock_slowpath     native_queued_spin_lock_slowpath
+#define queued_spin_lock_slowpath_queue        
native_queued_spin_lock_slowpath_queue
  #endif
#endif /* _GEN_PV_LOCK_SLOWPATH */ +void queued_spin_lock_slowpath_queue(struct qspinlock *lock);
+static void __queued_spin_lock_slowpath_queue(struct qspinlock *lock);
+
  /**
   * queued_spin_lock_slowpath - acquire the queued spinlock
   * @lock: Pointer to queued spinlock structure
@@ -314,12 +318,6 @@ static __always_inline u32  __pv_wait_head_or_lock(struct 
qspinlock *lock,
   */
  void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
  {
-       struct mcs_spinlock *prev, *next, *node;
-       u32 old, tail;
-       int idx;
-
-       BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
-
        if (pv_enabled())
                goto pv_queue;
@@ -397,6 +395,26 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
  queue:
        lockevent_inc(lock_slowpath);
  pv_queue:
+       __queued_spin_lock_slowpath_queue(lock);
+}
+EXPORT_SYMBOL(queued_spin_lock_slowpath);
+
+void queued_spin_lock_slowpath_queue(struct qspinlock *lock)
+{
+       lockevent_inc(lock_slowpath);
+       __queued_spin_lock_slowpath_queue(lock);
+}
+EXPORT_SYMBOL(queued_spin_lock_slowpath_queue);
+
+static void __queued_spin_lock_slowpath_queue(struct qspinlock *lock)
+{
+       struct mcs_spinlock *prev, *next, *node;
+       u32 old, tail;
+       u32 val;
+       int idx;
+
+       BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
+
        node = this_cpu_ptr(&qnodes[0].mcs);
        idx = node->count++;
        tail = encode_tail(smp_processor_id(), idx);
@@ -559,7 +577,6 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 
val)
         */
        __this_cpu_dec(qnodes[0].mcs.count);
  }
-EXPORT_SYMBOL(queued_spin_lock_slowpath);
/*
   * Generate the paravirt code for queued_spin_unlock_slowpath().

I would prefer to extract out the pending bit handling code out into a separate helper function which can be overridden by the arch code instead of breaking the slowpath into 2 pieces.

Cheers,
Longman

Reply via email to