With -fvisibility=hidden, gcc gets too smart and inlines atomic_set into tc_lock_lock. The optimizer can't tell that the asm in atomic_set is changing anything, because its input is a fixed pointer, not the volatile value it points too. So the x86_64 lock loop looks like this:
34: 48 c7 c0 01 00 00 00 mov $0x1,%rax 3b: 48 89 d9 mov %rbx,%rcx 3e: f0 48 87 01 lock xchg %rax,(%rcx) 42: 48 89 c2 mov %rax,%rdx 45: 0f 1f 00 nopl (%rax) 48: 48 85 d2 test %rdx,%rdx 4b: 75 fb jne 48 <tc_lock_lock+0x28> That is, on failure 4b jumps back to 48 forever. It's also not really correct to be using a 64-bit xchg, since the memory value is just int. The 32-bit x86 version gets a similar loop with the xchg lifted out. This patch greatly simplifies the asm, with its input "+m"(*val) ensuring that gcc knows we're using the volatile value, and a broad "memory" constraint so the lock can protect other data too. That loop is now: 11: b9 01 00 00 00 mov $0x1,%ecx 16: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 1d: 00 00 00 20: 89 ca mov %ecx,%edx 22: 87 13 xchg %edx,(%rbx) 24: 85 d2 test %edx,%edx 26: 75 f8 jne 20 <tc_lock_lock+0x20> Signed-off-by: Josh Stone <[email protected]> --- dyninstAPI_RT/src/RTthread-x86-64.c | 67 +++++-------------------------------- dyninstAPI_RT/src/RTthread-x86.c | 31 ++++++----------- 2 files changed, 19 insertions(+), 79 deletions(-) diff --git a/dyninstAPI_RT/src/RTthread-x86-64.c b/dyninstAPI_RT/src/RTthread-x86-64.c index ce1be36..c5ab2cb 100644 --- a/dyninstAPI_RT/src/RTthread-x86-64.c +++ b/dyninstAPI_RT/src/RTthread-x86-64.c @@ -30,60 +30,15 @@ #include "dyninstAPI_RT/src/RTthread.h" -long atomic_set(volatile int *val) +static int atomic_set(volatile int *val) { - long result = 0; -#if defined(MUTATEE_32) - __asm("movl $1,%%eax\n" - "movl %1,%%ecx\n" - "lock\n" - "xchgl %%eax, (%%ecx)\n" - "movl %%eax, %0\n" - : "=r" (result) - : "r" (val) - : "%eax", - "%ecx"); -#else - __asm("mov $1,%%rax\n" - "mov %1,%%rcx\n" - "lock\n" - "xchg %%rax, (%%rcx)\n" - "mov %%rax, %0\n" - : "=r" (result) - : "r" (val) - : "%rax", - "%rcx"); -#endif + int result = 1; + __asm__ __volatile__( + "xchgl %0, %1\n" + : "+r" (result), "+m" (*val) + :: "memory"); return !result; } -/* -#if 1 - __asm( - "movl $0,%%eax\n" - "movl $1,%%ebx\n" - "movl %1,%%ecx\n" - "lock\n" - "cmpxchgl %%ebx,(%%ecx)\n" - "setz %%al\n" - "movl %%eax,%0\n" - : "=r" (result) - : "r" (val) - : "%eax", "%ebx", "%ecx"); -#else - __asm( - "mov $0,%%rax\n" - "mov $1,%%rbx\n" - "mov %1,%%rcx\n" - "lock\n" - "cmpxchg %%rbx,(%%rcx)\n" - "setz %%al\n" - "mov %%rax,%0\n" - : "=r" (result) - : "r" (val) - : "%rax", "%rbx", "%rcx"); -#endif - return result; -*/ int tc_lock_lock(tc_lock_t *tc) { @@ -93,13 +48,9 @@ int tc_lock_lock(tc_lock_t *tc) if (me == tc->tid) return DYNINST_DEAD_LOCK; - while (1) { - int wasNotLocked = atomic_set(&tc->mutex); - if( wasNotLocked ) { - tc->tid = me; - break; - } - } + while (!atomic_set(&tc->mutex)); + + tc->tid = me; return 0; } diff --git a/dyninstAPI_RT/src/RTthread-x86.c b/dyninstAPI_RT/src/RTthread-x86.c index a4e8c9b..21e31ba 100644 --- a/dyninstAPI_RT/src/RTthread-x86.c +++ b/dyninstAPI_RT/src/RTthread-x86.c @@ -57,21 +57,14 @@ int atomic_set(volatile int *val) return result; } #else -int atomic_set(volatile int *val) +static int atomic_set(volatile int *val) { - int result; - __asm( - "movl $0,%%eax\n" - "movl $1,%%edx\n" - "movl %1,%%ecx\n" - "lock\n" - "cmpxchgl %%edx,(%%ecx)\n" - "setz %%al\n" - "movl %%eax,%0\n" - : "=r" (result) - : "r" (val) - : "%eax", "%edx", "%ecx"); - return result; + int result = 1; + __asm__ __volatile__( + "xchgl %0, %1\n" + : "+r" (result), "+m" (*val) + :: "memory"); + return !result; } #endif @@ -83,13 +76,9 @@ int tc_lock_lock(tc_lock_t *tc) if (me == tc->tid) return DYNINST_DEAD_LOCK; - while (1) { - if (tc->mutex == 0 && atomic_set(&tc->mutex)) - { - tc->tid = me; - break; - } - } + while (!atomic_set(&tc->mutex)); + + tc->tid = me; return 0; } -- 1.8.3.1 _______________________________________________ Dyninst-api mailing list [email protected] https://lists.cs.wisc.edu/mailman/listinfo/dyninst-api
