From: Peter Zijlstra <pet...@infradead.org>

Because the qspinlock needs to touch a second cacheline; add a pending
bit and allow a single in-word spinner before we punt to the second
cacheline.

Signed-off-by: Peter Zijlstra <pet...@infradead.org>
Signed-off-by: Waiman Long <waiman.l...@hp.com>
---
 include/asm-generic/qspinlock_types.h |   12 +++-
 kernel/locking/qspinlock.c            |  121 +++++++++++++++++++++++++++------
 2 files changed, 110 insertions(+), 23 deletions(-)

diff --git a/include/asm-generic/qspinlock_types.h 
b/include/asm-generic/qspinlock_types.h
index f66f845..bd25081 100644
--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h
@@ -39,8 +39,9 @@ typedef struct qspinlock {
  * Bitfields in the atomic value:
  *
  *  0- 7: locked byte
- *  8- 9: tail index
- * 10-31: tail cpu (+1)
+ *     8: pending
+ *  9-10: tail index
+ * 11-31: tail cpu (+1)
  */
 #define        _Q_SET_MASK(type)       (((1U << _Q_ ## type ## _BITS) - 1)\
                                      << _Q_ ## type ## _OFFSET)
@@ -48,7 +49,11 @@ typedef struct qspinlock {
 #define _Q_LOCKED_BITS         8
 #define _Q_LOCKED_MASK         _Q_SET_MASK(LOCKED)
 
-#define _Q_TAIL_IDX_OFFSET     (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
+#define _Q_PENDING_OFFSET      (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
+#define _Q_PENDING_BITS                1
+#define _Q_PENDING_MASK                _Q_SET_MASK(PENDING)
+
+#define _Q_TAIL_IDX_OFFSET     (_Q_PENDING_OFFSET + _Q_PENDING_BITS)
 #define _Q_TAIL_IDX_BITS       2
 #define _Q_TAIL_IDX_MASK       _Q_SET_MASK(TAIL_IDX)
 
@@ -57,5 +62,6 @@ typedef struct qspinlock {
 #define _Q_TAIL_CPU_MASK       _Q_SET_MASK(TAIL_CPU)
 
 #define _Q_LOCKED_VAL          (1U << _Q_LOCKED_OFFSET)
+#define _Q_PENDING_VAL         (1U << _Q_PENDING_OFFSET)
 
 #endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index b97a1ad..6467bfc 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -83,23 +83,97 @@ static inline struct mcs_spinlock *decode_tail(u32 tail)
        return per_cpu_ptr(&mcs_nodes[idx], cpu);
 }
 
+#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
+
+/**
+ * trylock_pending - try to acquire queue spinlock using the pending bit
+ * @lock : Pointer to queue spinlock structure
+ * @pval : Pointer to value of the queue spinlock 32-bit word
+ * Return: 1 if lock acquired, 0 otherwise
+ */
+static inline int trylock_pending(struct qspinlock *lock, u32 *pval)
+{
+       u32 old, new, val = *pval;
+
+       /*
+        * trylock || pending
+        *
+        * 0,0,0 -> 0,0,1 ; trylock
+        * 0,0,1 -> 0,1,1 ; pending
+        */
+       for (;;) {
+               /*
+                * If we observe any contention; queue.
+                */
+               if (val & ~_Q_LOCKED_MASK)
+                       return 0;
+
+               new = _Q_LOCKED_VAL;
+               if (val == new)
+                       new |= _Q_PENDING_VAL;
+
+               old = atomic_cmpxchg(&lock->val, val, new);
+               if (old == val)
+                       break;
+
+               *pval = val = old;
+       }
+
+       /*
+        * we won the trylock
+        */
+       if (new == _Q_LOCKED_VAL)
+               return 1;
+
+       /*
+        * we're pending, wait for the owner to go away.
+        *
+        * *,1,1 -> *,1,0
+        */
+       while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK)
+               arch_mutex_cpu_relax();
+
+       /*
+        * take ownership and clear the pending bit.
+        *
+        * *,1,0 -> *,0,1
+        */
+       for (;;) {
+               new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+
+               old = atomic_cmpxchg(&lock->val, val, new);
+               if (old == val)
+                       break;
+
+               val = old;
+       }
+       return 1;
+}
+
 /**
  * queue_spin_lock_slowpath - acquire the queue spinlock
  * @lock: Pointer to queue spinlock structure
  * @val: Current value of the queue spinlock 32-bit word
  *
- * (queue tail, lock bit)
+ * (queue tail, pending bit, lock bit)
+ *
+ *              fast     :    slow                                  :    unlock
+ *                       :                                          :
+ * uncontended  (0,0,0) -:--> (0,0,1) ------------------------------:--> 
(*,*,0)
+ *                       :       | ^--------.------.             /  :
+ *                       :       v           \      \            |  :
+ * pending               :    (0,1,1) +--> (0,1,0)   \           |  :
+ *                       :       | ^--'              |           |  :
+ *                       :       v                   |           |  :
+ * uncontended           :    (n,x,y) +--> (n,0,0) --'           |  :
+ *   queue               :       | ^--'                          |  :
+ *                       :       v                               |  :
+ * contended             :    (*,x,y) +--> (*,0,0) ---> (*,0,1) -'  :
+ *   queue               :         ^--'                             :
  *
- *              fast      :    slow                                  :    
unlock
- *                        :                                          :
- * uncontended  (0,0)   --:--> (0,1) --------------------------------:--> (*,0)
- *                        :       | ^--------.                    /  :
- *                        :       v           \                   |  :
- * uncontended            :    (n,x) --+--> (n,0)                 |  :
- *   queue                :       | ^--'                          |  :
- *                        :       v                               |  :
- * contended              :    (*,x) --+--> (*,0) -----> (*,1) ---'  :
- *   queue                :         ^--'                             :
+ * The pending bit processing is in the trylock_pending() function
+ * whereas the uncontended and contended queue processing is in the
+ * queue_spin_lock_slowpath() function.
  *
  */
 void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
@@ -110,6 +184,9 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, u32 
val)
 
        BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
 
+       if (trylock_pending(lock, &val))
+               return; /* Lock acquired */
+
        node = this_cpu_ptr(&mcs_nodes[0]);
        idx = node->count++;
        tail = encode_tail(smp_processor_id(), idx);
@@ -119,15 +196,18 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, u32 
val)
        node->next = NULL;
 
        /*
+        * we already touched the queueing cacheline; don't bother with pending
+        * stuff.
+        *
         * trylock || xchg(lock, node)
         *
-        * 0,0 -> 0,1 ; trylock
-        * p,x -> n,x ; prev = xchg(lock, node)
+        * 0,0,0 -> 0,0,1 ; trylock
+        * p,y,x -> n,y,x ; prev = xchg(lock, node)
         */
        for (;;) {
                new = _Q_LOCKED_VAL;
                if (val)
-                       new = tail | (val & _Q_LOCKED_MASK);
+                       new = tail | (val & _Q_LOCKED_PENDING_MASK);
 
                old = atomic_cmpxchg(&lock->val, val, new);
                if (old == val)
@@ -145,7 +225,7 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, u32 
val)
        /*
         * if there was a previous node; link it and wait.
         */
-       if (old & ~_Q_LOCKED_MASK) {
+       if (old & ~_Q_LOCKED_PENDING_MASK) {
                prev = decode_tail(old);
                ACCESS_ONCE(prev->next) = node;
 
@@ -153,18 +233,19 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, u32 
val)
        }
 
        /*
-        * we're at the head of the waitqueue, wait for the owner to go away.
+        * we're at the head of the waitqueue, wait for the owner & pending to
+        * go away.
         *
-        * *,x -> *,0
+        * *,x,y -> *,0,0
         */
-       while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK)
+       while ((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK)
                arch_mutex_cpu_relax();
 
        /*
         * claim the lock:
         *
-        * n,0 -> 0,1 : lock, uncontended
-        * *,0 -> *,1 : lock, contended
+        * n,0,0 -> 0,0,1 : lock, uncontended
+        * *,0,0 -> *,0,1 : lock, contended
         */
        for (;;) {
                new = _Q_LOCKED_VAL;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to