On Thu, 2015-03-12 at 18:55 +0800, Kevin Hao wrote:
> I know Torsten Duwe has tried to add the ticket spinlock for powerpc
> one year ago [1]. But it make no progress due to the conflict between
> PPC_SPLPAR and lockref. We still don't find a better way to handle
> this. But instead of waiting forever for a perfect solution, can't we
> just use the ticket spinlock for the !CONFIG_PPC_SPLPAR?
> 
> This is a very rough patch based on arm64 codes. I want to make sure
> that this is acceptable before going step further. This just passed
> build and boot test on a fsl t4240rdb board. I have done a simple
> performance benchmark by running the following command ten times before
> and after applying this patch:
>       ./perf bench sched messaging
> 
>                                 Before        After
>     Averaged total time [sec]:  0.403         0.367
> 
> So we can see a ~9% performance enhancing. This patch depends on this
> one [2].

I would do the ifdef'ing differently, something like

CONFIG_PPC_HAS_LOCK_OWNER

CONFIG_PPC_TICKET_LOCKS depends on !PPC_HAS_LOCK_OWNER

and use these two in the code... with SPLPAR select'ing HAS_LOCK_OWNER

> [1] https://lists.ozlabs.org/pipermail/linuxppc-dev/2014-February/115195.html
> [2] http://patchwork.ozlabs.org/patch/447563/
> 
> Signed-off-by: Kevin Hao <haoke...@gmail.com>
> ---
>  arch/powerpc/include/asm/spinlock.h       | 79 
> ++++++++++++++++++++++++++++++-
>  arch/powerpc/include/asm/spinlock_types.h | 16 +++++++
>  arch/powerpc/lib/locks.c                  |  2 +-
>  3 files changed, 95 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/spinlock.h 
> b/arch/powerpc/include/asm/spinlock.h
> index d303cdad2519..3faf2507abe9 100644
> --- a/arch/powerpc/include/asm/spinlock.h
> +++ b/arch/powerpc/include/asm/spinlock.h
> @@ -54,6 +54,7 @@
>  #define SYNC_IO
>  #endif
>  
> +#ifdef CONFIG_PPC_SPLPAR
>  static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
>  {
>       return lock.slock == 0;
> @@ -89,6 +90,40 @@ static inline unsigned long 
> __arch_spin_trylock(arch_spinlock_t *lock)
>       return tmp;
>  }
>  
> +#else
> +static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
> +{
> +     return lock.owner == lock.next;
> +}
> +
> +static inline int arch_spin_is_locked(arch_spinlock_t *lock)
> +{
> +     return !arch_spin_value_unlocked(READ_ONCE(*lock));
> +}
> +
> +static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
> +{
> +     unsigned int tmp;
> +     arch_spinlock_t lockval;
> +
> +     __asm__ __volatile__ (
> +"1:  " PPC_LWARX(%0,0,%2,1) "\n\
> +     rotlwi          %1,%0,16\n\
> +     xor.            %1,%1,%0\n\
> +     bne-            2f\n\
> +     add             %0,%0,%3\n\
> +     stwcx.          %0,0,%2\n\
> +     bne-            1b\n"
> +     PPC_ACQUIRE_BARRIER
> +"2:"
> +     : "=&r" (lockval), "=&r" (tmp)
> +     : "r" (lock), "r" (1 << TICKET_SHIFT)
> +     : "cr0", "memory");
> +
> +     return tmp;
> +}
> +#endif
> +
>  static inline int arch_spin_trylock(arch_spinlock_t *lock)
>  {
>       CLEAR_IO_SYNC;
> @@ -120,6 +155,7 @@ extern void __rw_yield(arch_rwlock_t *lock);
>  #define SHARED_PROCESSOR     0
>  #endif
>  
> +#ifdef CONFIG_PPC_SPLPAR
>  static inline void arch_spin_lock(arch_spinlock_t *lock)
>  {
>       CLEAR_IO_SYNC;
> @@ -155,16 +191,57 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, 
> unsigned long flags)
>               local_irq_restore(flags_dis);
>       }
>  }
> +#else
> +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
> +
> +static inline int arch_spin_is_contended(arch_spinlock_t *lock)
> +{
> +     arch_spinlock_t lockval = READ_ONCE(*lock);
> +     return (lockval.next - lockval.owner) > 1;
> +}
> +#define arch_spin_is_contended       arch_spin_is_contended
> +
> +static inline void arch_spin_lock(arch_spinlock_t *lock)
> +{
> +     unsigned int tmp;
> +     arch_spinlock_t lockval;
> +
> +     CLEAR_IO_SYNC;
> +     __asm__ __volatile__ (
> +"1:  " PPC_LWARX(%0,0,%2,1) "\n\
> +     add             %1,%0,%4\n\
> +     stwcx.          %1,0,%2\n\
> +     bne-            1b\n\
> +     rotlwi          %1,%0,16\n\
> +     cmpw            %1,%0\n\
> +     beq             3f\n\
> +     rlwinm          %0,%0,16,16,31\n\
> +2:   or              1,1,1\n\
> +     lhz             %1,0(%3)\n\
> +     cmpw            %1,%0\n\
> +     bne             2b\n\
> +     or              2,2,2\n\
> +3:"
> +     PPC_ACQUIRE_BARRIER
> +     : "=&r" (lockval), "=&r" (tmp)
> +     : "r"(lock), "r" (&lock->owner), "r" (1 << TICKET_SHIFT)
> +     : "cr0", "memory");
> +}
> +#endif
>  
>  static inline void arch_spin_unlock(arch_spinlock_t *lock)
>  {
>       SYNC_IO;
>       __asm__ __volatile__("# arch_spin_unlock\n\t"
>                               PPC_RELEASE_BARRIER: : :"memory");
> +#ifdef CONFIG_PPC_SPLPAR
>       lock->slock = 0;
> +#else
> +     lock->owner++;
> +#endif
>  }
>  
> -#ifdef CONFIG_PPC64
> +#ifdef CONFIG_PPC_SPLPAR
>  extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
>  #else
>  #define arch_spin_unlock_wait(lock) \
> diff --git a/arch/powerpc/include/asm/spinlock_types.h 
> b/arch/powerpc/include/asm/spinlock_types.h
> index 2351adc4fdc4..1af94f290363 100644
> --- a/arch/powerpc/include/asm/spinlock_types.h
> +++ b/arch/powerpc/include/asm/spinlock_types.h
> @@ -5,11 +5,27 @@
>  # error "please don't include this file directly"
>  #endif
>  
> +#ifdef CONFIG_PPC_SPLPAR
>  typedef struct {
>       volatile unsigned int slock;
>  } arch_spinlock_t;
>  
>  #define __ARCH_SPIN_LOCK_UNLOCKED    { 0 }
> +#else
> +#define TICKET_SHIFT 16
> +
> +typedef struct {
> +#ifdef __BIG_ENDIAN__
> +     u16 next;
> +     u16 owner;
> +#else
> +     u16 owner;
> +     u16 next;
> +#endif
> +} __aligned(4) arch_spinlock_t;
> +
> +#define __ARCH_SPIN_LOCK_UNLOCKED    { 0 , 0 }
> +#endif /*CONFIG_PPC_SPLPAR*/
>  
>  typedef struct {
>       volatile signed int lock;
> diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
> index 170a0346f756..fe3d21eeb10d 100644
> --- a/arch/powerpc/lib/locks.c
> +++ b/arch/powerpc/lib/locks.c
> @@ -66,7 +66,6 @@ void __rw_yield(arch_rwlock_t *rw)
>       plpar_hcall_norets(H_CONFER,
>               get_hard_smp_processor_id(holder_cpu), yield_count);
>  }
> -#endif
>  
>  void arch_spin_unlock_wait(arch_spinlock_t *lock)
>  {
> @@ -83,3 +82,4 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
>  }
>  
>  EXPORT_SYMBOL(arch_spin_unlock_wait);
> +#endif


_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to