On Fri, Mar 24, 2017 at 10:23:29PM +0100, Peter Zijlstra wrote:

> I'll try and redo the patches that landed in tip and see what it does
> for total vmlinux size somewhere tomorrow.

   text    data     bss     dec     hex filename
10726413        4540256  843776 16110445         f5d36d 
defconfig-build/vmlinux.pre
10730509        4540256  843776 16114541         f5e36d 
defconfig-build/vmlinux.post

:-(

---
 arch/x86/include/asm/atomic.h      | 18 ++++++-------
 arch/x86/include/asm/atomic64_64.h | 24 ++++++++++--------
 arch/x86/include/asm/cmpxchg.h     | 50 ++++++++++++++++++++----------------
 include/linux/atomic.h             | 52 +++++++++++++++++++++++++-------------
 lib/refcount.c                     | 35 ++++++++++++++-----------
 5 files changed, 104 insertions(+), 75 deletions(-)

diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index caa5798..c80d4914 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -186,11 +186,8 @@ static __always_inline int atomic_cmpxchg(atomic_t *v, int 
old, int new)
        return cmpxchg(&v->counter, old, new);
 }
 
-#define atomic_try_cmpxchg atomic_try_cmpxchg
-static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new)
-{
-       return try_cmpxchg(&v->counter, old, new);
-}
+#define atomic_try_cmpxchg(_ptr, _old, _new, _label)                   \
+       try_cmpxchg(&(_ptr)->counter, _old, _new, _label)
 
 static inline int atomic_xchg(atomic_t *v, int new)
 {
@@ -210,8 +207,9 @@ static inline void atomic_##op(int i, atomic_t *v)          
        \
 static inline int atomic_fetch_##op(int i, atomic_t *v)                        
\
 {                                                                      \
        int val = atomic_read(v);                                       \
-       do {                                                            \
-       } while (!atomic_try_cmpxchg(v, &val, val c_op i));             \
+       for (;;)                                                        \
+               val = atomic_try_cmpxchg(v, val, val c_op i, success);  \
+success:                                                               \
        return val;                                                     \
 }
 
@@ -239,10 +237,12 @@ ATOMIC_OPS(xor, ^)
 static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
        int c = atomic_read(v);
-       do {
+       for (;;) {
                if (unlikely(c == u))
                        break;
-       } while (!atomic_try_cmpxchg(v, &c, c + a));
+               c = atomic_try_cmpxchg(v, c, c + a, success);
+       }
+success:
        return c;
 }
 
diff --git a/arch/x86/include/asm/atomic64_64.h 
b/arch/x86/include/asm/atomic64_64.h
index 6189a43..489c3e2 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -176,11 +176,8 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long 
old, long new)
        return cmpxchg(&v->counter, old, new);
 }
 
-#define atomic64_try_cmpxchg atomic64_try_cmpxchg
-static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, long *old, 
long new)
-{
-       return try_cmpxchg(&v->counter, old, new);
-}
+#define atomic64_try_cmpxchg(_ptr, _old, _new, _label)                 \
+       try_cmpxchg(&(_ptr)->counter, _old, _new, _label)
 
 static inline long atomic64_xchg(atomic64_t *v, long new)
 {
@@ -199,10 +196,12 @@ static inline long atomic64_xchg(atomic64_t *v, long new)
 static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
 {
        long c = atomic64_read(v);
-       do {
+       for (;;) {
                if (unlikely(c == u))
                        return false;
-       } while (!atomic64_try_cmpxchg(v, &c, c + a));
+               c = atomic64_try_cmpxchg(v, c, c + a, success);
+       }
+success:
        return true;
 }
 
@@ -218,11 +217,13 @@ static inline bool atomic64_add_unless(atomic64_t *v, 
long a, long u)
 static inline long atomic64_dec_if_positive(atomic64_t *v)
 {
        long dec, c = atomic64_read(v);
-       do {
+       for (;;) {
                dec = c - 1;
                if (unlikely(dec < 0))
                        break;
-       } while (!atomic64_try_cmpxchg(v, &c, dec));
+               c = atomic64_try_cmpxchg(v, c, dec, success);
+       }
+success:
        return dec;
 }
 
@@ -239,8 +240,9 @@ static inline void atomic64_##op(long i, atomic64_t *v)     
                \
 static inline long atomic64_fetch_##op(long i, atomic64_t *v)          \
 {                                                                      \
        long val = atomic64_read(v);                                    \
-       do {                                                            \
-       } while (!atomic64_try_cmpxchg(v, &val, val c_op i));           \
+       for (;;)                                                        \
+               val = atomic64_try_cmpxchg(v, val, val c_op i, success);\
+success:                                                               \
        return val;                                                     \
 }
 
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index fb961db..e6b8a8f 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -154,22 +154,24 @@ extern void __add_wrong_size(void)
        __cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
 
 
-#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock)               \
+#define __raw_try_cmpxchg(_ptr, _old, _new, success_label, size, lock) \
 ({                                                                     \
-       bool success;                                                   \
-       __typeof__(_ptr) _old = (_pold);                                \
-       __typeof__(*(_ptr)) __old = *_old;                              \
+       __typeof__(*(_ptr)) __old = (_old);                             \
        __typeof__(*(_ptr)) __new = (_new);                             \
+       __typeof__(*(_ptr)) __ret;                                      \
+       bool __success;                                                 \
+                                                                       \
        switch (size) {                                                 \
        case __X86_CASE_B:                                              \
        {                                                               \
                volatile u8 *__ptr = (volatile u8 *)(_ptr);             \
                asm volatile(lock "cmpxchgb %[new], %[ptr]"             \
                             CC_SET(z)                                  \
-                            : CC_OUT(z) (success),                     \
+                            : CC_OUT(z) (__success),                   \
                               [ptr] "+m" (*__ptr),                     \
-                              [old] "+a" (__old)                       \
-                            : [new] "q" (__new)                        \
+                              [old] "=a" (__ret)                       \
+                            : [new] "q" (__new),                       \
+                              "2" (__old)                              \
                             : "memory");                               \
                break;                                                  \
        }                                                               \
@@ -178,10 +180,11 @@ extern void __add_wrong_size(void)
                volatile u16 *__ptr = (volatile u16 *)(_ptr);           \
                asm volatile(lock "cmpxchgw %[new], %[ptr]"             \
                             CC_SET(z)                                  \
-                            : CC_OUT(z) (success),                     \
+                            : CC_OUT(z) (__success),                   \
                               [ptr] "+m" (*__ptr),                     \
-                              [old] "+a" (__old)                       \
-                            : [new] "r" (__new)                        \
+                              [old] "=a" (__ret)                       \
+                            : [new] "r" (__new),                       \
+                              "2" (__old)                              \
                             : "memory");                               \
                break;                                                  \
        }                                                               \
@@ -190,10 +193,11 @@ extern void __add_wrong_size(void)
                volatile u32 *__ptr = (volatile u32 *)(_ptr);           \
                asm volatile(lock "cmpxchgl %[new], %[ptr]"             \
                             CC_SET(z)                                  \
-                            : CC_OUT(z) (success),                     \
+                            : CC_OUT(z) (__success),                   \
                               [ptr] "+m" (*__ptr),                     \
-                              [old] "+a" (__old)                       \
-                            : [new] "r" (__new)                        \
+                              [old] "=a" (__ret)                       \
+                            : [new] "r" (__new),                       \
+                              "2" (__old)                              \
                             : "memory");                               \
                break;                                                  \
        }                                                               \
@@ -202,25 +206,27 @@ extern void __add_wrong_size(void)
                volatile u64 *__ptr = (volatile u64 *)(_ptr);           \
                asm volatile(lock "cmpxchgq %[new], %[ptr]"             \
                             CC_SET(z)                                  \
-                            : CC_OUT(z) (success),                     \
+                            : CC_OUT(z) (__success),                   \
                               [ptr] "+m" (*__ptr),                     \
-                              [old] "+a" (__old)                       \
-                            : [new] "r" (__new)                        \
+                              [old] "=a" (__ret)                       \
+                            : [new] "r" (__new),                       \
+                              "2" (__old)                              \
                             : "memory");                               \
                break;                                                  \
        }                                                               \
        default:                                                        \
                __cmpxchg_wrong_size();                                 \
        }                                                               \
-       *_old = __old;                                                  \
-       success;                                                        \
+                                                                       \
+       if (likely(__success)) goto success_label;                      \
+       __ret;                                                          \
 })
 
-#define __try_cmpxchg(ptr, pold, new, size)                            \
-       __raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
+#define __try_cmpxchg(ptr, pold, new, success_label, size)             \
+       __raw_try_cmpxchg((ptr), (pold), (new), success_label, (size), 
LOCK_PREFIX)
 
-#define try_cmpxchg(ptr, pold, new)                                    \
-       __try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
+#define try_cmpxchg(ptr, pold, new, success_label)                     \
+       __try_cmpxchg((ptr), (pold), (new), success_label, sizeof(*(ptr)))
 
 /*
  * xadd() adds "inc" to "*ptr" and atomically returns the previous
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index aae5953..13a6eac 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -425,18 +425,26 @@
 
 #ifndef atomic_try_cmpxchg
 
-#define __atomic_try_cmpxchg(type, _p, _po, _n)                                
\
+#define __atomic_try_cmpxchg(type, _p, _o, _n, _label)                 \
 ({                                                                     \
-       typeof(_po) __po = (_po);                                       \
-       typeof(*(_po)) __o = *__po;                                     \
-       *__po = atomic_cmpxchg##type((_p), __o, (_n));                  \
-       (*__po == __o);                                                 \
+       typeof(*(_p)) __r;                                              \
+       typeof(*(_p)) __o = (_o);                                       \
+       __r = atomic_cmpxchg##type((_p), __o, (_n));                    \
+       if (__r == __o) goto _label;                                    \
+       __r;                                                            \
 })
 
-#define atomic_try_cmpxchg(_p, _po, _n)                __atomic_try_cmpxchg(, 
_p, _po, _n)
-#define atomic_try_cmpxchg_relaxed(_p, _po, _n)        
__atomic_try_cmpxchg(_relaxed, _p, _po, _n)
-#define atomic_try_cmpxchg_acquire(_p, _po, _n)        
__atomic_try_cmpxchg(_acquire, _p, _po, _n)
-#define atomic_try_cmpxchg_release(_p, _po, _n)        
__atomic_try_cmpxchg(_release, _p, _po, _n)
+#define atomic_try_cmpxchg(_p, _o, _n, _l)                             \
+       __atomic_try_cmpxchg(, _p, _o, _n, _l)
+
+#define atomic_try_cmpxchg_relaxed(_p, _o, _n, _l)                     \
+       __atomic_try_cmpxchg(_relaxed, _p, _o, _n, _l)
+
+#define atomic_try_cmpxchg_acquire(_p, _o, _n, _l)                     \
+       __atomic_try_cmpxchg(_acquire, _p, _o, _n, _l)
+
+#define atomic_try_cmpxchg_release(_p, _o, _n, _l)                     \
+       __atomic_try_cmpxchg(_release, _p, _o, _n, _l)
 
 #else /* atomic_try_cmpxchg */
 #define atomic_try_cmpxchg_relaxed     atomic_try_cmpxchg
@@ -1019,18 +1027,26 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 
 #ifndef atomic64_try_cmpxchg
 
-#define __atomic64_try_cmpxchg(type, _p, _po, _n)                      \
+#define __atomic64_try_cmpxchg(type, _p, _o, _n, _label)               \
 ({                                                                     \
-       typeof(_po) __po = (_po);                                       \
-       typeof(*(_po)) __o = *__po;                                     \
-       *__po = atomic64_cmpxchg##type((_p), __o, (_n));                \
-       (*__po == __o);                                                 \
+       typeof(*(_p)) __r;                                              \
+       typeof(*(_p)) __o = (_o);                                       \
+       __r = atomic64_cmpxchg##type((_p), __o, (_n));                  \
+       if (__r == __o) goto _label;                                    \
+       __r;                                                            \
 })
 
-#define atomic64_try_cmpxchg(_p, _po, _n)              
__atomic64_try_cmpxchg(, _p, _po, _n)
-#define atomic64_try_cmpxchg_relaxed(_p, _po, _n)      
__atomic64_try_cmpxchg(_relaxed, _p, _po, _n)
-#define atomic64_try_cmpxchg_acquire(_p, _po, _n)      
__atomic64_try_cmpxchg(_acquire, _p, _po, _n)
-#define atomic64_try_cmpxchg_release(_p, _po, _n)      
__atomic64_try_cmpxchg(_release, _p, _po, _n)
+#define atomic64_try_cmpxchg(_p, _o, _n, _l)                           \
+       __atomic64_try_cmpxchg(, _p, _o, _n, _l)
+
+#define atomic64_try_cmpxchg_relaxed(_p, _o, _n, _l)                   \
+       __atomic64_try_cmpxchg(_relaxed, _p, _o, _n, _l)
+
+#define atomic64_try_cmpxchg_acquire(_p, _o, _n, _l)                   \
+       __atomic64_try_cmpxchg(_acquire, _p, _o, _n, _l)
+
+#define atomic64_try_cmpxchg_release(_p, _o, _n, _l)                   \
+       __atomic64_try_cmpxchg(_release, _p, _o, _n, _l)
 
 #else /* atomic64_try_cmpxchg */
 #define atomic64_try_cmpxchg_relaxed   atomic64_try_cmpxchg
diff --git a/lib/refcount.c b/lib/refcount.c
index f42124c..18b8926 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -59,7 +59,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
 {
        unsigned int new, val = atomic_read(&r->refs);
 
-       do {
+       for (;;) {
                if (!val)
                        return false;
 
@@ -70,8 +70,9 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
                if (new < val)
                        new = UINT_MAX;
 
-       } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new));
-
+               val = atomic_try_cmpxchg_relaxed(&r->refs, val, new, success);
+       }
+success:
        WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
 
        return true;
@@ -116,7 +117,7 @@ bool refcount_inc_not_zero(refcount_t *r)
 {
        unsigned int new, val = atomic_read(&r->refs);
 
-       do {
+       for (;;) {
                new = val + 1;
 
                if (!val)
@@ -125,8 +126,9 @@ bool refcount_inc_not_zero(refcount_t *r)
                if (unlikely(!new))
                        return true;
 
-       } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new));
-
+               val = atomic_try_cmpxchg_relaxed(&r->refs, val, new, success);
+       }
+success:
        WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
 
        return true;
@@ -175,7 +177,7 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 {
        unsigned int new, val = atomic_read(&r->refs);
 
-       do {
+       for (;;) {
                if (unlikely(val == UINT_MAX))
                        return false;
 
@@ -185,8 +187,9 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r)
                        return false;
                }
 
-       } while (!atomic_try_cmpxchg_release(&r->refs, &val, new));
-
+               val = atomic_try_cmpxchg_release(&r->refs, val, new, success);
+       }
+success:
        return !new;
 }
 EXPORT_SYMBOL_GPL(refcount_sub_and_test);
@@ -244,9 +247,10 @@ EXPORT_SYMBOL_GPL(refcount_dec);
  */
 bool refcount_dec_if_one(refcount_t *r)
 {
-       int val = 1;
-
-       return atomic_try_cmpxchg_release(&r->refs, &val, 0);
+       atomic_try_cmpxchg_release(&r->refs, 1, 0, success);
+       return false;
+success:
+       return true;
 }
 EXPORT_SYMBOL_GPL(refcount_dec_if_one);
 
@@ -265,7 +269,7 @@ bool refcount_dec_not_one(refcount_t *r)
 {
        unsigned int new, val = atomic_read(&r->refs);
 
-       do {
+       for (;;) {
                if (unlikely(val == UINT_MAX))
                        return true;
 
@@ -278,8 +282,9 @@ bool refcount_dec_not_one(refcount_t *r)
                        return true;
                }
 
-       } while (!atomic_try_cmpxchg_release(&r->refs, &val, new));
-
+               val = atomic_try_cmpxchg_release(&r->refs, val, new, success);
+       }
+success:
        return true;
 }
 EXPORT_SYMBOL_GPL(refcount_dec_not_one);

Reply via email to