pvqspinlock: Enable deferment of vCPU kicking to unlock call

Waiman Long Fri, 31 Jul 2015 19:24:07 -0700

Most of the vCPU kickings are done on the locking side where the new
lock holder wake up the queue head vCPU to spin on the lock. However,
there are situations where it may be advantageous to defer the vCPU
kicking to when the lock holder releases the lock.


This patch enables the deferment of vCPU kicking to the unlock function
by adding a new vCPU state (vcpu_hashed) to marks the fact that
 1) _Q_SLOW_VAL is set in the lock, and
 2) the pv_node address is stored in the hash table

This enablement patch, by itself, should not change the performance
of the pvqspinlock code. Actual deferment vCPU kicks will be added
in a later patch.

Signed-off-by: Waiman Long <[email protected]>
---
 kernel/locking/qspinlock.c          |    6 +++---
 kernel/locking/qspinlock_paravirt.h |   34 ++++++++++++++++++++++++----------
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 6518ee9..94fdd27 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -259,8 +259,8 @@ static __always_inline void set_locked(struct qspinlock 
*lock)
 
 static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
 static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
-static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { }
-
+static __always_inline void __pv_kick_node(struct qspinlock *lock,
+                                          struct mcs_spinlock *node) { }
 static __always_inline void __pv_wait_head(struct qspinlock *lock,
                                           struct mcs_spinlock *node) { }
 
@@ -464,7 +464,7 @@ queue:
                cpu_relax();
 
        arch_mcs_spin_unlock_contended(&next->locked);
-       pv_kick_node(next);
+       pv_kick_node(lock, next);
 
 release:
        /*
diff --git a/kernel/locking/qspinlock_paravirt.h 
b/kernel/locking/qspinlock_paravirt.h
index 5efcc65..5e140fe 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -33,6 +33,7 @@
 enum vcpu_state {
        vcpu_running = 0,
        vcpu_halted,
+       vcpu_hashed,    /* vcpu_halted + node stored in hash table */
 };
 
 struct pv_node {
@@ -406,13 +407,17 @@ static void pv_wait_node(struct mcs_spinlock *node)
                        pv_wait(&pn->state, vcpu_halted);
                }
 
+               if (READ_ONCE(node->locked))
+                       break;
+
                /*
-                * Reset the vCPU state to avoid unncessary CPU kicking
+                * Reset the vCPU state to running to avoid unncessary CPU
+                * kicking unless vcpu_hashed had already been set. In this
+                * case, node->locked should have just been set, and we
+                * aren't going to set state to vcpu_halted again.
                 */
-               WRITE_ONCE(pn->state, vcpu_running);
+               cmpxchg(&pn->state, vcpu_halted, vcpu_running);
 
-               if (READ_ONCE(node->locked))
-                       break;
                /*
                 * If the locked flag is still not set after wakeup, it is a
                 * spurious wakeup and the vCPU should wait again. However,
@@ -431,12 +436,16 @@ static void pv_wait_node(struct mcs_spinlock *node)
 
 /*
  * Called after setting next->locked = 1, used to wake those stuck in
- * pv_wait_node().
+ * pv_wait_node(). Alternatively, it can also defer the kicking to the
+ * unlock function.
  */
-static void pv_kick_node(struct mcs_spinlock *node)
+static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
 {
        struct pv_node *pn = (struct pv_node *)node;
 
+       if (xchg(&pn->state, vcpu_running) != vcpu_halted)
+               return;
+
        /*
         * Note that because node->locked is already set, this actual
         * mcs_spinlock entry could be re-used already.
@@ -446,10 +455,8 @@ static void pv_kick_node(struct mcs_spinlock *node)
         *
         * See the comment in pv_wait_node().
         */
-       if (xchg(&pn->state, vcpu_running) == vcpu_halted) {
-               pvstat_inc(pvstat_lock_kick);
-               pv_kick(pn->cpu);
-       }
+       pvstat_inc(pvstat_lock_kick);
+       pv_kick(pn->cpu);
 }
 
 /*
@@ -471,6 +478,13 @@ static void pv_wait_head(struct qspinlock *lock, struct 
mcs_spinlock *node)
                        cpu_relax();
                }
 
+               if (!lp && (xchg(&pn->state, vcpu_hashed) == vcpu_hashed))
+                       /*
+                        * The hashed table & _Q_SLOW_VAL had been filled
+                        * by the lock holder.
+                        */
+                       lp = (struct qspinlock **)-1;
+
                if (!lp) { /* ONCE */
                        lp = pv_hash(lock, pn);
                        /*
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 5/7] locking/pvqspinlock: Enable deferment of vCPU kicking to unlock call

Reply via email to