From: Alexander Sverdlin <alexander.sverd...@nokia.com>

It makes no sense to fold smp_mb__before_llsc()/smp_llsc_mb() again and
again, leave only one barrier pair in the outer function.

This removes one SYNCW from __xchg_small() and brings around 10%
performance improvement in a tight spinlock loop with 6 threads on a 6 core
Octeon.

Signed-off-by: Alexander Sverdlin <alexander.sverd...@nokia.com>
---
 arch/mips/kernel/cmpxchg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/cmpxchg.c b/arch/mips/kernel/cmpxchg.c
index 89107de..122e85f 100644
--- a/arch/mips/kernel/cmpxchg.c
+++ b/arch/mips/kernel/cmpxchg.c
@@ -41,7 +41,7 @@ unsigned long __xchg_small(volatile void *ptr, unsigned long 
val, unsigned int s
        do {
                old32 = load32;
                new32 = (load32 & ~mask) | (val << shift);
-               load32 = cmpxchg(ptr32, old32, new32);
+               load32 = cmpxchg_local(ptr32, old32, new32);
        } while (load32 != old32);
 
        return (load32 & mask) >> shift;
@@ -97,7 +97,7 @@ unsigned long __cmpxchg_small(volatile void *ptr, unsigned 
long old,
                 */
                old32 = (load32 & ~mask) | (old << shift);
                new32 = (load32 & ~mask) | (new << shift);
-               load32 = cmpxchg(ptr32, old32, new32);
+               load32 = cmpxchg_local(ptr32, old32, new32);
                if (load32 == old32)
                        return old;
        }
-- 
2.10.2

Reply via email to