hello, Waiman
I try your patch, thanks!

also I do some improvement.
below code diff has been tested, it works for me. :)

diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index ce2f75e..99f31e4 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -248,7 +248,8 @@ static __always_inline void set_locked(struct qspinlock 
*lock)
  */
 
 static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
-static __always_inline void __pv_wait_node(struct mcs_spinlock *node,
+static __always_inline void __pv_wait_node(struct qspinlock *lock,
+                                          struct mcs_spinlock *node,
                                           struct mcs_spinlock *prev) { }
 static __always_inline void __pv_kick_node(struct qspinlock *lock,
                                           struct mcs_spinlock *node) { }
@@ -407,7 +408,7 @@ queue:
                prev = decode_tail(old);
                WRITE_ONCE(prev->next, node);
 
-               pv_wait_node(node, prev);
+               pv_wait_node(lock, node, prev);
                arch_mcs_spin_lock_contended(&node->locked);
 
                /*
diff --git a/kernel/locking/qspinlock_paravirt.h 
b/kernel/locking/qspinlock_paravirt.h
index 01a6d16..75ccfd3 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -255,6 +257,19 @@ static struct pv_node *pv_unhash(struct qspinlock *lock)
        BUG();
 }
 
+static struct pv_node *pv_hash_lookup(struct qspinlock *lock)
+{
+       unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
+       struct pv_hash_entry *he;
+
+       for_each_hash_entry(he, offset, hash) {
+               if (READ_ONCE(he->lock) == lock) {
+                       return he->node;
+               }
+       }
+       return NULL;
+}
+
 /*
  * Return true if when it is time to check the previous node which is not
  * in a running state.
@@ -287,14 +303,17 @@ static void pv_init_node(struct mcs_spinlock *node)
  * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its
  * behalf.
  */
-static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
+static void pv_wait_node(struct qspinlock *lock, struct mcs_spinlock *node,
+                       struct mcs_spinlock *prev)
 {
        struct pv_node *pn = (struct pv_node *)node;
        struct pv_node *pp = (struct pv_node *)prev;
+       struct pv_node *ph;
        int waitcnt = 0;
        int loop;
        bool wait_early;
 
+       pn->prev_cpu = pp->cpu;
        /* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */
        for (;; waitcnt++) {
                for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
@@ -322,7 +341,10 @@ static void pv_wait_node(struct mcs_spinlock *node, struct 
mcs_spinlock *prev)
                        qstat_inc(qstat_pv_wait_node, true);
                        qstat_inc(qstat_pv_wait_again, waitcnt);
                        qstat_inc(qstat_pv_wait_early, wait_early);
-                       pv_wait(&pn->state, vcpu_halted);
+                       ph = pv_hash_lookup(lock);
+                       if (!ph)
+                               ph = pp;
+                       pv_wait(&pn->state, vcpu_halted, ph->prev_cpu);
                }
 
                /*

any comments are welcome. I put my patch here just for simplicity, or need I 
send it out in a new thread?

thanks
xinhui

On 2016年04月14日 17:36, Pan Xinhui wrote:
> 
> 
> On 2016年04月14日 04:59, Waiman Long wrote:
>> Pan Xinhui was asking for a lock holder cpu argument in pv_wait()
>> to help the porting of pvqspinlock to PPC. The new argument will can
>> potentially help hypervisor expediate the execution of the critical
>> section so that the lock holder vCPU can release the lock sooner.
>>
>> This patch does just that by storing the previous node vCPU number.
>> In pv_wait_head_or_lock(), pv_wait() will be called with that vCPU
>> number as it is likely to be the lock holder. In pv_wait_node(),
>> -1 will be passed to pv_wait() instead to indicate that it doesn't
>> know what the current lock holder is.
>>
>> This patch introduces negligible overhead to the current pvqspinlock
>> code. The extra lockcpu argument isn't currently used in x86
>> architecture.
>>
>> Signed-off-by: Waiman Long <waiman.l...@hpe.com>
>> ---
>>  arch/x86/include/asm/paravirt.h       |    4 ++--
>>  arch/x86/include/asm/paravirt_types.h |    2 +-
>>  arch/x86/kernel/kvm.c                 |    2 +-
>>  arch/x86/xen/spinlock.c               |    2 +-
>>  kernel/locking/qspinlock_paravirt.h   |   19 +++++++++++++++----
>>  kernel/locking/qspinlock_stat.h       |    8 ++++----
>>  6 files changed, 24 insertions(+), 13 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/paravirt.h 
>> b/arch/x86/include/asm/paravirt.h
>> index 601f1b8..b89eccf 100644
>> --- a/arch/x86/include/asm/paravirt.h
>> +++ b/arch/x86/include/asm/paravirt.h
>> @@ -676,9 +676,9 @@ static __always_inline void pv_queued_spin_unlock(struct 
>> qspinlock *lock)
>>      PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock);
>>  }
>>
>> -static __always_inline void pv_wait(u8 *ptr, u8 val)
>> +static __always_inline void pv_wait(u8 *ptr, u8 val, int lockcpu)
>>  {
>> -    PVOP_VCALL2(pv_lock_ops.wait, ptr, val);
>> +    PVOP_VCALL3(pv_lock_ops.wait, ptr, val, lockcpu);
>>  }
>>
>>  static __always_inline void pv_kick(int cpu)
>> diff --git a/arch/x86/include/asm/paravirt_types.h 
>> b/arch/x86/include/asm/paravirt_types.h
>> index e8c2326..2fc26c1 100644
>> --- a/arch/x86/include/asm/paravirt_types.h
>> +++ b/arch/x86/include/asm/paravirt_types.h
>> @@ -312,7 +312,7 @@ struct pv_lock_ops {
>>      void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
>>      struct paravirt_callee_save queued_spin_unlock;
>>
>> -    void (*wait)(u8 *ptr, u8 val);
>> +    void (*wait)(u8 *ptr, u8 val, int lockcpu);
>>      void (*kick)(int cpu);
>>  #else /* !CONFIG_QUEUED_SPINLOCKS */
>>      struct paravirt_callee_save lock_spinning;
>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>> index dc1207e..47ab4e1 100644
>> --- a/arch/x86/kernel/kvm.c
>> +++ b/arch/x86/kernel/kvm.c
>> @@ -590,7 +590,7 @@ static void kvm_kick_cpu(int cpu)
>>
>>  #include <asm/qspinlock.h>
>>
>> -static void kvm_wait(u8 *ptr, u8 val)
>> +static void kvm_wait(u8 *ptr, u8 val, int lockcpu)
>>  {
>>      unsigned long flags;
>>
>> diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
>> index 9e2ba5c..6f78c41 100644
>> --- a/arch/x86/xen/spinlock.c
>> +++ b/arch/x86/xen/spinlock.c
>> @@ -33,7 +33,7 @@ static void xen_qlock_kick(int cpu)
>>  /*
>>   * Halt the current CPU & release it back to the host
>>   */
>> -static void xen_qlock_wait(u8 *byte, u8 val)
>> +static void xen_qlock_wait(u8 *byte, u8 val, int lockcpu)
>>  {
>>      int irq = __this_cpu_read(lock_kicker_irq);
>>
>> diff --git a/kernel/locking/qspinlock_paravirt.h 
>> b/kernel/locking/qspinlock_paravirt.h
>> index 21ede57..4bec98b 100644
>> --- a/kernel/locking/qspinlock_paravirt.h
>> +++ b/kernel/locking/qspinlock_paravirt.h
>> @@ -51,6 +51,7 @@ struct pv_node {
>>      struct mcs_spinlock     __res[3];
>>
>>      int                     cpu;
>> +    int                     prev_cpu;       /* Previous node cpu */
>>      u8                      state;
>>  };
>>
>> @@ -156,8 +157,7 @@ static __always_inline int trylock_clear_pending(struct 
>> qspinlock *lock)
>>   * 256 (64-bit) or 512 (32-bit) to fully utilize a 4k page.
>>   *
>>   * Since we should not be holding locks from NMI context (very rare indeed) 
>> the
>> - * max load factor is 0.75, which is around the point where open addressing
>> - * breaks down.
>> + * max load factor is 0.75.
>>   *
>>   */
>>  struct pv_hash_entry {
>> @@ -275,6 +275,7 @@ static void pv_init_node(struct mcs_spinlock *node)
>>
>>      pn->cpu = smp_processor_id();
>>      pn->state = vcpu_running;
>> +    pn->prev_cpu = -1;
>>  }
>>
>>  /*
>> @@ -290,6 +291,8 @@ static void pv_wait_node(struct mcs_spinlock *node, 
>> struct mcs_spinlock *prev)
>>      int loop;
>>      bool wait_early;
>>
>> +    pn->prev_cpu = pp->cpu; /* Save previous node vCPU */
>> +
>>      /* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */
>>      for (;; waitcnt++) {
>>              for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
>> @@ -317,7 +320,7 @@ static void pv_wait_node(struct mcs_spinlock *node, 
>> struct mcs_spinlock *prev)
>>                      qstat_inc(qstat_pv_wait_node, true);
>>                      qstat_inc(qstat_pv_wait_again, waitcnt);
>>                      qstat_inc(qstat_pv_wait_early, wait_early);
>> -                    pv_wait(&pn->state, vcpu_halted);
>> +                    pv_wait(&pn->state, vcpu_halted, -1);
> If the contention is high, we might run here. And we indeed need the lock 
> holder on such scenario.
> how about that, we store the lock into pv_node, then search the lock in 
> hashtable,
> code might look like,
>       node = pv_hash_lookup(pn->lock);
>       pv_wait(...,node->holder);
> thanks
> xinhui
>>              }
>>
>>              /*
>> @@ -453,7 +456,15 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct 
>> mcs_spinlock *node)
>>              WRITE_ONCE(pn->state, vcpu_halted);
>>              qstat_inc(qstat_pv_wait_head, true);
>>              qstat_inc(qstat_pv_wait_again, waitcnt);
>> -            pv_wait(&l->locked, _Q_SLOW_VAL);
>> +
>> +            /*
>> +             * Pass in the previous node vCPU nmber which is likely to be
>> +             * the lock holder vCPU. This additional information may help
>> +             * the hypervisor to give more resource to that vCPU so that
>> +             * it can release the lock faster. With lock stealing,
>> +             * however, that vCPU may not be the actual lock holder.
>> +             */
>> +            pv_wait(&l->locked, _Q_SLOW_VAL, pn->prev_cpu);
>>
>>              /*
>>               * The unlocker should have freed the lock before kicking the
>> diff --git a/kernel/locking/qspinlock_stat.h 
>> b/kernel/locking/qspinlock_stat.h
>> index eb2a2c9..8728348 100644
>> --- a/kernel/locking/qspinlock_stat.h
>> +++ b/kernel/locking/qspinlock_stat.h
>> @@ -266,12 +266,12 @@ static inline void __pv_kick(int cpu)
>>  /*
>>   * Replacement function for pv_wait()
>>   */
>> -static inline void __pv_wait(u8 *ptr, u8 val)
>> +static inline void __pv_wait(u8 *ptr, u8 val, int lockcpu)
>>  {
>>      u64 *pkick_time = this_cpu_ptr(&pv_kick_time);
>>
>>      *pkick_time = 0;
>> -    pv_wait(ptr, val);
>> +    pv_wait(ptr, val, lockcpu);
>>      if (*pkick_time) {
>>              this_cpu_add(qstats[qstat_pv_latency_wake],
>>                           sched_clock() - *pkick_time);
>> @@ -279,8 +279,8 @@ static inline void __pv_wait(u8 *ptr, u8 val)
>>      }
>>  }
>>
>> -#define pv_kick(c)  __pv_kick(c)
>> -#define pv_wait(p, v)       __pv_wait(p, v)
>> +#define pv_kick(c)          __pv_kick(c)
>> +#define pv_wait(p, v, c)    __pv_wait(p, v, c)
>>
>>  #else /* CONFIG_QUEUED_LOCK_STAT */
>>
> 

Reply via email to