On Tue, 2014-05-27 at 20:07 +0200, Jakub Jelinek wrote:
> On Mon, May 26, 2014 at 09:25:37PM -0500, Peter Bergner wrote:
> > In one of my other posts, I asked should 32-bit ports even attempt
> > to use the 2 * word_size atomics.  What is the code doing such that
> > it wants to use a 2 * word_size atomic?  Is it as simple as commenting
> > that code out for 32-bit builds of the library or do we really have
> > to support that?
> 
> BTW, just checked and I don't see any 2 * word_size atomics in i686
> libasan.so.1 - no cmpxchg8b insns anywhere.  Thus I think it would
> be really nice if you could point out where exactly is the 64-bit
> atomic really needed (or gcc options + preprocessed source so that
> it can be investigated in a cross-compiler).

It's being called form basically two files:

[bergner@makalu-lp1 gcc-fsf-mainline-asan-debug]$ find . -name '*.o' | xargs nm 
-AC | grep sync_fetch_and_add_8
./powerpc64-linux/32/libsanitizer/sanitizer_common/.libs/sanitizer_allocator.o: 
        U __sync_fetch_and_add_8
./powerpc64-linux/32/libsanitizer/sanitizer_common/sanitizer_allocator.o:       
  U __sync_fetch_and_add_8
./powerpc64-linux/32/libsanitizer/asan/.libs/asan_allocator2.o:         U 
__sync_fetch_and_add_8
./powerpc64-linux/32/libsanitizer/asan/asan_allocator2.o:         U 
__sync_fetch_and_add_8


It seems to be an expansion of the atomic_load template in both object files:

00000000 <__sanitizer::atomic_uint64_t::Type 
__sanitizer::atomic_load<__sanitizer::atomic_uint64_t>(__sanitize
r::atomic_uint64_t const volatile*, __sanitizer::memory_order)>:
   0:   94 21 ff d0     stwu    r1,-48(r1)
   4:   7c 08 02 a6     mflr    r0
   8:   90 01 00 34     stw     r0,52(r1)
   c:   93 c1 00 28     stw     r30,40(r1)
  10:   42 9f 00 05     bcl     20,4*cr7+so,14 
<__sanitizer::atomic_uint64_t::Type __sanitizer::atomic_load<__
sanitizer::atomic_uint64_t>(__sanitizer::atomic_uint64_t const volatile*, 
__sanitizer::memory_order)+0x14>
  14:   7f c8 02 a6     mflr    r30
  18:   3f de 00 00     addis   r30,r30,0
                        1a: R_PPC_REL16_HA      .got2+0x8006
  1c:   3b de 00 00     addi    r30,r30,0
                        1e: R_PPC_REL16_LO      .got2+0x800a
  20:   90 61 00 18     stw     r3,24(r1)
  24:   90 81 00 1c     stw     r4,28(r1)
  28:   81 21 00 18     lwz     r9,24(r1)
  2c:   38 a0 00 00     li      r5,0
  30:   38 c0 00 00     li      r6,0
  34:   7d 23 4b 78     mr      r3,r9
  38:   48 00 00 01     bl      38 <__sanitizer::atomic_uint64_t::Type 
__sanitizer::atomic_load<__sanitizer::a
tomic_uint64_t>(__sanitizer::atomic_uint64_t const volatile*, 
__sanitizer::memory_order)+0x38>
                        38: R_PPC_PLTREL24      __sync_fetch_and_add_8+0x8000
  3c:   90 61 00 20     stw     r3,32(r1)
  40:   90 81 00 24     stw     r4,36(r1)
  44:   c8 01 00 20     lfd     f0,32(r1)
  48:   d8 01 00 08     stfd    f0,8(r1)
  4c:   81 21 00 08     lwz     r9,8(r1)
  50:   81 41 00 0c     lwz     r10,12(r1)
  54:   7d 23 4b 78     mr      r3,r9
  58:   7d 44 53 78     mr      r4,r10
  5c:   80 01 00 34     lwz     r0,52(r1)
  60:   7c 08 03 a6     mtlr    r0
  64:   83 c1 00 28     lwz     r30,40(r1)
  68:   38 21 00 30     addi    r1,r1,48
  6c:   4e 80 00 20     blr


This template comes from the ./sanitizer_common/sanitizer_atomic_clang_other.h 
header file:

template<typename T>
INLINE typename T::Type atomic_load(
    const volatile T *a, memory_order mo) {
  DCHECK(mo & (memory_order_relaxed | memory_order_consume
      | memory_order_acquire | memory_order_seq_cst));
  DCHECK(!((uptr)a % sizeof(*a)));
  typename T::Type v;

  if (sizeof(*a) < 8 || sizeof(void*) == 8) {
    // Assume that aligned loads are atomic.
    if (mo == memory_order_relaxed) {
      v = a->val_dont_use;
    } else if (mo == memory_order_consume) {
      // Assume that processor respects data dependencies
      // (and that compiler won't break them).
      __asm__ __volatile__("" ::: "memory");
      v = a->val_dont_use;
      __asm__ __volatile__("" ::: "memory");
    } else if (mo == memory_order_acquire) {
      __asm__ __volatile__("" ::: "memory");
      v = a->val_dont_use;
      __sync_synchronize();
    } else {  // seq_cst
      // E.g. on POWER we need a hw fence even before the store.
      __sync_synchronize();
      v = a->val_dont_use;
      __sync_synchronize();
    }
  } else {
    // 64-bit load on 32-bit platform.
    // Gross, but simple and reliable.
    // Assume that it is not in read-only memory.
    v = __sync_fetch_and_add(
        const_cast<typename T::Type volatile *>(&a->val_dont_use), 0);
  }
  return v;
}

It seems x86 has it's own version in sanitizer_atomic_clang_x86.h which
maybe explains why i686 doesn't see this call?  It does:

  ...
  } else {
    // 64-bit load on 32-bit platform.
    __asm__ __volatile__(
        "movq %1, %%mm0;"  // Use mmx reg for 64-bit atomic moves
        "movq %%mm0, %0;"  // (ptr could be read-only)
        "emms;"            // Empty mmx state/Reset FP regs
        : "=m" (v)
        : "m" (a->val_dont_use)
        : // mark the FP stack and mmx registers as clobbered
          "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
#ifdef __MMX__
          "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
#endif  // #ifdef __MMX__
          "memory");


Peter



Reply via email to