Most of the vCPU kickings are done on the locking side where the new lock holder wake up the queue head vCPU to spin on the lock. However, there are situations where it may be advantageous to defer the vCPU kicking to when the lock holder releases the lock.
This patch enables the deferment of vCPU kicking to the unlock function by adding a new vCPU state (vcpu_hashed) to marks the fact that 1) _Q_SLOW_VAL is set in the lock, and 2) the pv_node address is stored in the hash table This enablement patch, by itself, should not change the performance of the pvqspinlock code. Actual deferment vCPU kicks will be added in a later patch. Signed-off-by: Waiman Long <[email protected]> --- kernel/locking/qspinlock.c | 6 +++--- kernel/locking/qspinlock_paravirt.h | 34 ++++++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 6518ee9..94fdd27 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -259,8 +259,8 @@ static __always_inline void set_locked(struct qspinlock *lock) static __always_inline void __pv_init_node(struct mcs_spinlock *node) { } static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { } -static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { } - +static __always_inline void __pv_kick_node(struct qspinlock *lock, + struct mcs_spinlock *node) { } static __always_inline void __pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) { } @@ -464,7 +464,7 @@ queue: cpu_relax(); arch_mcs_spin_unlock_contended(&next->locked); - pv_kick_node(next); + pv_kick_node(lock, next); release: /* diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 5efcc65..5e140fe 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -33,6 +33,7 @@ enum vcpu_state { vcpu_running = 0, vcpu_halted, + vcpu_hashed, /* vcpu_halted + node stored in hash table */ }; struct pv_node { @@ -406,13 +407,17 @@ static void pv_wait_node(struct mcs_spinlock *node) pv_wait(&pn->state, vcpu_halted); } + if (READ_ONCE(node->locked)) + break; + /* - * Reset the vCPU state to avoid unncessary CPU kicking + * Reset the vCPU state to running to avoid unncessary CPU + * kicking unless vcpu_hashed had already been set. In this + * case, node->locked should have just been set, and we + * aren't going to set state to vcpu_halted again. */ - WRITE_ONCE(pn->state, vcpu_running); + cmpxchg(&pn->state, vcpu_halted, vcpu_running); - if (READ_ONCE(node->locked)) - break; /* * If the locked flag is still not set after wakeup, it is a * spurious wakeup and the vCPU should wait again. However, @@ -431,12 +436,16 @@ static void pv_wait_node(struct mcs_spinlock *node) /* * Called after setting next->locked = 1, used to wake those stuck in - * pv_wait_node(). + * pv_wait_node(). Alternatively, it can also defer the kicking to the + * unlock function. */ -static void pv_kick_node(struct mcs_spinlock *node) +static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; + if (xchg(&pn->state, vcpu_running) != vcpu_halted) + return; + /* * Note that because node->locked is already set, this actual * mcs_spinlock entry could be re-used already. @@ -446,10 +455,8 @@ static void pv_kick_node(struct mcs_spinlock *node) * * See the comment in pv_wait_node(). */ - if (xchg(&pn->state, vcpu_running) == vcpu_halted) { - pvstat_inc(pvstat_lock_kick); - pv_kick(pn->cpu); - } + pvstat_inc(pvstat_lock_kick); + pv_kick(pn->cpu); } /* @@ -471,6 +478,13 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) cpu_relax(); } + if (!lp && (xchg(&pn->state, vcpu_hashed) == vcpu_hashed)) + /* + * The hashed table & _Q_SLOW_VAL had been filled + * by the lock holder. + */ + lp = (struct qspinlock **)-1; + if (!lp) { /* ONCE */ lp = pv_hash(lock, pn); /* -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/

