On Tue, Apr 2, 2019 at 1:28 PM Alexander Potapenko <gli...@google.com> wrote: > > 1. Use memory clobber in bitops that touch arbitrary memory > > Certain bit operations that read/write bits take a base pointer and an > arbitrarily large offset to address the bit relative to that base. > Inline assembly constraints aren't expressive enough to tell the > compiler that the assembly directive is going to touch a specific memory > location of unknown size, therefore we have to use the "memory" clobber > to indicate that the assembly is going to access memory locations other > than those listed in the inputs/outputs. > To indicate that BTR/BTS instructions don't necessarily touch the first > sizeof(long) bytes of the argument, we also move the address to assembly > inputs. > > This particular change leads to size increase of 124 kernel functions in > a defconfig build. For some of them the diff is in NOP operations, other > end up re-reading values from memory and may potentially slow down the > execution. But without these clobbers the compiler is free to cache > the contents of the bitmaps and use them as if they weren't changed by > the inline assembly. > > 2. Use byte-sized arguments for operations touching single bytes. > > Passing a long value to ANDB/ORB/XORB instructions makes the compiler > treat sizeof(long) bytes as being clobbered, which isn't the case. This > may theoretically lead to worse code in the case of heavy optimization. > > Signed-off-by: Alexander Potapenko <gli...@google.com> > Cc: Dmitry Vyukov <dvyu...@google.com> > Cc: Paul E. McKenney <paul...@linux.ibm.com> > Cc: H. Peter Anvin <h...@zytor.com> > Cc: Peter Zijlstra <pet...@infradead.org> > Cc: James Y Knight <jykni...@google.com> > --- > v2: > -- renamed the patch > -- addressed comment by Peter Zijlstra: don't use "+m" for functions > returning void > -- fixed input types for operations touching single bytes > --- > arch/x86/include/asm/bitops.h | 41 +++++++++++++++-------------------- > 1 file changed, 18 insertions(+), 23 deletions(-) > > diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h > index d153d570bb04..c0cb9c5d3476 100644 > --- a/arch/x86/include/asm/bitops.h > +++ b/arch/x86/include/asm/bitops.h > @@ -36,16 +36,17 @@ > * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). > */ > > -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) > +#define RLONG_ADDR(x) "m" (*(volatile long *) (x)) > +#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x)) > > -#define ADDR BITOP_ADDR(addr) > +#define ADDR RLONG_ADDR(addr) > > /* > * We do the locked ops that don't return the old value as > * a mask operation on a byte. > */ > #define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) > -#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3)) > +#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3)) > #define CONST_MASK(nr) (1 << ((nr) & 7)) > > /** > @@ -73,7 +74,7 @@ set_bit(long nr, volatile unsigned long *addr) > : "memory"); > } else { > asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" > - : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); > + : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); > } > } > > @@ -88,7 +89,7 @@ set_bit(long nr, volatile unsigned long *addr) > */ > static __always_inline void __set_bit(long nr, volatile unsigned long *addr) > { > - asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory"); > + asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); > } > > /** > @@ -110,8 +111,7 @@ clear_bit(long nr, volatile unsigned long *addr) > : "iq" ((u8)~CONST_MASK(nr))); > } else { > asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" > - : BITOP_ADDR(addr) > - : "Ir" (nr)); > + : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); > } > } > > @@ -131,7 +131,7 @@ static __always_inline void clear_bit_unlock(long nr, > volatile unsigned long *ad > > static __always_inline void __clear_bit(long nr, volatile unsigned long > *addr) > { > - asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr)); > + asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); > } > > static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, > volatile unsigned long *addr) > @@ -139,7 +139,7 @@ static __always_inline bool > clear_bit_unlock_is_negative_byte(long nr, volatile > bool negative; > asm volatile(LOCK_PREFIX "andb %2,%1" > CC_SET(s) > - : CC_OUT(s) (negative), ADDR > + : CC_OUT(s) (negative), WBYTE_ADDR(addr) > : "ir" ((char) ~(1 << nr)) : "memory"); > return negative; > } > @@ -155,13 +155,9 @@ static __always_inline bool > clear_bit_unlock_is_negative_byte(long nr, volatile > * __clear_bit() is non-atomic and implies release semantics before the > memory > * operation. It can be used for an unlock if no other CPUs can concurrently > * modify other bits in the word. > - * > - * No memory barrier is required here, because x86 cannot reorder stores past > - * older loads. Same principle as spin_unlock. > */ > static __always_inline void __clear_bit_unlock(long nr, volatile unsigned > long *addr) > { > - barrier(); Should have reflected this in the patch description. > __clear_bit(nr, addr); > } > > @@ -176,7 +172,7 @@ static __always_inline void __clear_bit_unlock(long nr, > volatile unsigned long * > */ > static __always_inline void __change_bit(long nr, volatile unsigned long > *addr) > { > - asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr)); > + asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); > } > > /** > @@ -196,8 +192,7 @@ static __always_inline void change_bit(long nr, volatile > unsigned long *addr) > : "iq" ((u8)CONST_MASK(nr))); > } else { > asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" > - : BITOP_ADDR(addr) > - : "Ir" (nr)); > + : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); > } > } > > @@ -242,8 +237,8 @@ static __always_inline bool __test_and_set_bit(long nr, > volatile unsigned long * > > asm(__ASM_SIZE(bts) " %2,%1" > CC_SET(c) > - : CC_OUT(c) (oldbit), ADDR > - : "Ir" (nr)); > + : CC_OUT(c) (oldbit) > + : ADDR, "Ir" (nr) : "memory"); > return oldbit; > } > > @@ -282,8 +277,8 @@ static __always_inline bool __test_and_clear_bit(long nr, > volatile unsigned long > > asm volatile(__ASM_SIZE(btr) " %2,%1" > CC_SET(c) > - : CC_OUT(c) (oldbit), ADDR > - : "Ir" (nr)); > + : CC_OUT(c) (oldbit) > + : ADDR, "Ir" (nr) : "memory"); > return oldbit; > } > > @@ -294,8 +289,8 @@ static __always_inline bool __test_and_change_bit(long > nr, volatile unsigned lon > > asm volatile(__ASM_SIZE(btc) " %2,%1" > CC_SET(c) > - : CC_OUT(c) (oldbit), ADDR > - : "Ir" (nr) : "memory"); > + : CC_OUT(c) (oldbit) > + : ADDR, "Ir" (nr) : "memory"); > > return oldbit; > } > @@ -326,7 +321,7 @@ static __always_inline bool variable_test_bit(long nr, > volatile const unsigned l > asm volatile(__ASM_SIZE(bt) " %2,%1" > CC_SET(c) > : CC_OUT(c) (oldbit) > - : "m" (*(unsigned long *)addr), "Ir" (nr)); > + : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); > > return oldbit; > } > -- > 2.21.0.392.gf8f6787159e-goog >
-- Alexander Potapenko Software Engineer Google Germany GmbH Erika-Mann-Straße, 33 80636 München Geschäftsführer: Paul Manicle, Halimah DeLaine Prado Registergericht und -nummer: Hamburg, HRB 86891 Sitz der Gesellschaft: Hamburg