Because the qspinlock needs to touch a second cacheline; add a pending
bit and allow a single in-word spinner before we punt to the second
cacheline.

Signed-off-by: Peter Zijlstra <pet...@infradead.org>
---
 include/asm-generic/qspinlock_types.h |   12 ++-
 kernel/locking/qspinlock.c            |  109 +++++++++++++++++++++++++++-------
 2 files changed, 97 insertions(+), 24 deletions(-)

--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h
@@ -39,8 +39,9 @@ typedef struct qspinlock {
  * Bitfields in the atomic value:
  *
  *  0- 7: locked byte
- *  8- 9: tail index
- * 10-31: tail cpu (+1)
+ *     8: pending
+ *  9-10: tail index
+ * 11-31: tail cpu (+1)
  */
 #define        _Q_SET_MASK(type)       (((1U << _Q_ ## type ## _BITS) - 1)\
                                      << _Q_ ## type ## _OFFSET)
@@ -48,7 +49,11 @@ typedef struct qspinlock {
 #define _Q_LOCKED_BITS         8
 #define _Q_LOCKED_MASK         _Q_SET_MASK(LOCKED)
 
-#define _Q_TAIL_IDX_OFFSET     (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
+#define _Q_PENDING_OFFSET      (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
+#define _Q_PENDING_BITS                1
+#define _Q_PENDING_MASK                _Q_SET_MASK(PENDING)
+
+#define _Q_TAIL_IDX_OFFSET     (_Q_PENDING_OFFSET + _Q_PENDING_BITS)
 #define _Q_TAIL_IDX_BITS       2
 #define _Q_TAIL_IDX_MASK       _Q_SET_MASK(TAIL_IDX)
 
@@ -57,5 +62,6 @@ typedef struct qspinlock {
 #define _Q_TAIL_CPU_MASK       _Q_SET_MASK(TAIL_CPU)
 
 #define _Q_LOCKED_VAL          (1U << _Q_LOCKED_OFFSET)
+#define _Q_PENDING_VAL         (1U << _Q_PENDING_OFFSET)
 
 #endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -83,24 +83,28 @@ static inline struct mcs_spinlock *decod
        return per_cpu_ptr(&mcs_nodes[idx], cpu);
 }
 
+#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
+
 /**
  * queue_spin_lock_slowpath - acquire the queue spinlock
  * @lock: Pointer to queue spinlock structure
  * @val: Current value of the queue spinlock 32-bit word
  *
- * (queue tail, lock bit)
- *
- *              fast      :    slow                                  :    
unlock
- *                        :                                          :
- * uncontended  (0,0)   --:--> (0,1) --------------------------------:--> (*,0)
- *                        :       | ^--------.                    /  :
- *                        :       v           \                   |  :
- * uncontended            :    (n,x) --+--> (n,0)                 |  :
- *   queue                :       | ^--'                          |  :
- *                        :       v                               |  :
- * contended              :    (*,x) --+--> (*,0) -----> (*,1) ---'  :
- *   queue                :         ^--'                             :
+ * (queue tail, pending bit, lock bit)
  *
+ *              fast     :    slow                                  :    unlock
+ *                       :                                          :
+ * uncontended  (0,0,0) -:--> (0,0,1) ------------------------------:--> 
(*,*,0)
+ *                       :       | ^--------.------.             /  :
+ *                       :       v           \      \            |  :
+ * pending               :    (0,1,1) +--> (0,1,0)   \           |  :
+ *                       :       | ^--'              |           |  :
+ *                       :       v                   |           |  :
+ * uncontended           :    (n,x,y) +--> (n,0,0) --'           |  :
+ *   queue               :       | ^--'                          |  :
+ *                       :       v                               |  :
+ * contended             :    (*,x,y) +--> (*,0,0) ---> (*,0,1) -'  :
+ *   queue               :         ^--'                             :
  */
 void queue_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 {
@@ -110,6 +114,65 @@ void queue_spin_lock_slowpath(struct qsp
 
        BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
 
+       /*
+        * trylock || pending
+        *
+        * 0,0,0 -> 0,0,1 ; trylock
+        * 0,0,1 -> 0,1,1 ; pending
+        */
+       for (;;) {
+               /*
+                * If we observe any contention; queue.
+                */
+               if (val & ~_Q_LOCKED_MASK)
+                       goto queue;
+
+               new = _Q_LOCKED_VAL;
+               if (val == new)
+                       new |= _Q_PENDING_VAL;
+
+               old = atomic_cmpxchg(&lock->val, val, new);
+               if (old == val)
+                       break;
+
+               val = old;
+       }
+
+       /*
+        * we won the trylock
+        */
+       if (new == _Q_LOCKED_VAL)
+               return;
+
+       /*
+        * we're pending, wait for the owner to go away.
+        *
+        * *,1,1 -> *,1,0
+        */
+       while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK)
+               cpu_relax();
+
+       /*
+        * take ownership and clear the pending bit.
+        *
+        * *,1,0 -> *,0,1
+        */
+       for (;;) {
+               new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+
+               old = atomic_cmpxchg(&lock->val, val, new);
+               if (old == val)
+                       break;
+
+               val = old;
+       }
+       return;
+
+       /*
+        * End of pending bit optimistic spinning and beginning of MCS
+        * queuing.
+        */
+queue:
        node = this_cpu_ptr(&mcs_nodes[0]);
        idx = node->count++;
        tail = encode_tail(smp_processor_id(), idx);
@@ -119,15 +182,18 @@ void queue_spin_lock_slowpath(struct qsp
        node->next = NULL;
 
        /*
+        * we already touched the queueing cacheline; don't bother with pending
+        * stuff.
+        *
         * trylock || xchg(lock, node)
         *
-        * 0,0 -> 0,1 ; trylock
-        * p,x -> n,x ; prev = xchg(lock, node)
+        * 0,0,0 -> 0,0,1 ; trylock
+        * p,y,x -> n,y,x ; prev = xchg(lock, node)
         */
        for (;;) {
                new = _Q_LOCKED_VAL;
                if (val)
-                       new = tail | (val & _Q_LOCKED_MASK);
+                       new = tail | (val & _Q_LOCKED_PENDING_MASK);
 
                old = atomic_cmpxchg(&lock->val, val, new);
                if (old == val)
@@ -145,7 +211,7 @@ void queue_spin_lock_slowpath(struct qsp
        /*
         * if there was a previous node; link it and wait.
         */
-       if (old & ~_Q_LOCKED_MASK) {
+       if (old & ~_Q_LOCKED_PENDING_MASK) {
                prev = decode_tail(old);
                ACCESS_ONCE(prev->next) = node;
 
@@ -153,18 +219,19 @@ void queue_spin_lock_slowpath(struct qsp
        }
 
        /*
-        * we're at the head of the waitqueue, wait for the owner to go away.
+        * we're at the head of the waitqueue, wait for the owner & pending to
+        * go away.
         *
-        * *,x -> *,0
+        * *,x,y -> *,0,0
         */
-       while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK)
+       while ((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK)
                cpu_relax();
 
        /*
         * claim the lock:
         *
-        * n,0 -> 0,1 : lock, uncontended
-        * *,0 -> *,1 : lock, contended
+        * n,0,0 -> 0,0,1 : lock, uncontended
+        * *,0,0 -> *,0,1 : lock, contended
         */
        for (;;) {
                new = _Q_LOCKED_VAL;


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to