https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80004

            Bug ID: 80004
           Summary: non-atomic load moved to before atomic load with
                    std::memory_order_acquire
           Product: gcc
           Version: 6.3.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: fboranek at atlas dot cz
  Target Milestone: ---

Folowing code
(https://gist.github.com/fboranek/5c99f96a99f902f5cf9dc5d597ff8fe3) for 4
thread doesn't compute the correct result if it is compiled by gcc (Debian
6.3.0-6) 6.3.0 20170205. The code is corect compiled by gcc 4.9.2 and 5.4.1. It
is also correct compiled if the variable counter is not static bat global.

This issue can be related to bug 78778, but bug 78778 is appear in 5.1.0 while
this issue is since version 6.


static int counter {0};
static std::atomic<bool> flag {false};

void increment1(int cycles)
{
    for (int i=0; i < cycles; ++i)
    {
        while (flag.exchange(true, std::memory_order_acquire));

        ++counter;

        flag.store(false, std::memory_order_release);
    }
}

// asm for gcc 5.4.1
00000000004010f0 <_Z10increment1i>:
  4010f0:       31 c9                   xor    %ecx,%ecx
  4010f2:       85 ff                   test   %edi,%edi
  4010f4:       ba 01 00 00 00          mov    $0x1,%edx
  4010f9:       7e 26                   jle    401121 <_Z10increment1i+0x31>
  4010fb:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
  401100:       89 d0                   mov    %edx,%eax
  401102:       86 05 ec 20 20 00       xchg   %al,0x2020ec(%rip)        #
6031f4 <_ZL4flag>
  401108:       84 c0                   test   %al,%al
  40110a:       75 f4                   jne    401100 <_Z10increment1i+0x10>
  40110c:       83 c1 01                add    $0x1,%ecx
  40110f:       83 05 e2 20 20 00 01    addl   $0x1,0x2020e2(%rip)        #
6031f8 <_ZL7counter>
  401116:       39 cf                   cmp    %ecx,%edi
  401118:       c6 05 d5 20 20 00 00    movb   $0x0,0x2020d5(%rip)        #
6031f4 <_ZL4flag>
  40111f:       75 df                   jne    401100 <_Z10increment1i+0x10>
  401121:       f3 c3                   repz retq 
  401123:       0f 1f 00                nopl   (%rax)
  401126:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
  40112d:       00 00 00

// asm for gcc 6.3.0
0000000000001420 <_Z10increment1i>:
    1420:       8b 0d d2 1d 20 00       mov    0x201dd2(%rip),%ecx        #
2031f8 <_ZL7counter>
    1426:       85 ff                   test   %edi,%edi
    1428:       ba 01 00 00 00          mov    $0x1,%edx
    142d:       8d 34 0f                lea    (%rdi,%rcx,1),%esi
    1430:       7e 26                   jle    1458 <_Z10increment1i+0x38>
    1432:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
    1438:       89 d0                   mov    %edx,%eax
    143a:       86 05 b4 1d 20 00       xchg   %al,0x201db4(%rip)        #
2031f4 <_ZL4flag>
    1440:       84 c0                   test   %al,%al
    1442:       75 f4                   jne    1438 <_Z10increment1i+0x18>
    1444:       83 c1 01                add    $0x1,%ecx
    1447:       39 ce                   cmp    %ecx,%esi
    1449:       89 0d a9 1d 20 00       mov    %ecx,0x201da9(%rip)        #
2031f8 <_ZL7counter>
    144f:       c6 05 9e 1d 20 00 00    movb   $0x0,0x201d9e(%rip)        #
2031f4 <_ZL4flag>
    1456:       75 e0                   jne    1438 <_Z10increment1i+0x18>
    1458:       f3 c3                   repz retq 
    145a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

Reply via email to