https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66867
--- Comment #1 from Sebastian Huber <sebastian.hu...@embedded-brains.de> --- This problem is also present on x86. The depreated __sync_bool_compare_and_swap() produces better code. #include <stdatomic.h> void f(atomic_uint *a) { unsigned int e = 0; atomic_compare_exchange_strong_explicit(a, &e, 1, memory_order_relaxed, memory_order_relaxed); } void g(unsigned int *a) { __sync_bool_compare_and_swap(a, 0, 1); } .file "cas.c" .section .text.unlikely,"ax",@progbits .LCOLDB0: .text .LHOTB0: .p2align 4,,15 .globl f .type f, @function f: .LFB0: .cfi_startproc movl $1, %edx xorl %eax, %eax movl $0, -4(%rsp) <- Superfluous lock cmpxchgl %edx, (%rdi) ret .cfi_endproc .LFE0: .size f, .-f .section .text.unlikely .LCOLDE0: .text .LHOTE0: .section .text.unlikely .LCOLDB1: .text .LHOTB1: .p2align 4,,15 .globl g .type g, @function g: .LFB1: .cfi_startproc movl $1, %edx xorl %eax, %eax lock cmpxchgl %edx, (%rdi) ret .cfi_endproc .LFE1: .size g, .-g .section .text.unlikely .LCOLDE1: .text .LHOTE1: .ident "GCC: (GNU) 6.0.0 20150717 (experimental)" .section .note.GNU-stack,"",@progbits