https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107456

            Bug ID: 107456
           Summary: std::atomic::fetch_xxx generate LOCK CMPXCHG instead
                    of simpler LOCK instructions
           Product: gcc
           Version: 12.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: marko.makela at mariadb dot com
  Target Milestone: ---

The code generation for several std::atomic::fetch_ operations is suboptimal on
IA-32 and AMD64.

Related clang issue: https://github.com/llvm/llvm-project/issues/58685

I verified this with "-c -O2" or "-c -O2 -m32 -march=i686" of the following:

#include <atomic>

// "void" functions generate the minimal IA-32 or AMD64 code
void lock_add(std::atomic<uint32_t> &a, uint32_t b) { a.fetch_add(b); }
void lock_sub(std::atomic<uint32_t> &a, uint32_t b) { a.fetch_sub(b); }
void lock_or(std::atomic<uint32_t> &a, uint32_t b) { a.fetch_or(b); }
void lock_and(std::atomic<uint32_t> &a, uint32_t b) { a.fetch_and(b); }
void lock_xor(std::atomic<uint32_t> &a, uint32_t b) { a.fetch_xor(b); }
// clang++-15: "lock inc"; g++-12: "lock add"
void lock_inc(std::atomic<uint32_t> &a) { a.fetch_add(1); }
// clang++-15: "lock dec"; g++-12: "lock sub"
void lock_dec(std::atomic<uint32_t> &a) { a.fetch_sub(1); }

// "lock add" degrades to lock xadd; add
uint32_t lock_add_result(std::atomic<uint32_t> &a, uint32_t b)
{
  return b + a.fetch_add(b);
}

// "lock sub" degrades to neg; lock xadd; sub
uint32_t lock_sub_result(std::atomic<uint32_t> &a, uint32_t b)
{
  return a.fetch_sub(b) - b;
}

// "lock or" degrades to lock cmpxchg
uint32_t lock_or_or(std::atomic<uint32_t> &a, uint32_t b)
{
  return a.fetch_or(b) | b;
}

// "lock or; and" degrades to lock cmpxchg
uint32_t lock_or_andneg(std::atomic<uint32_t> &a, uint32_t b)
{
  return a.fetch_or(b) & ~b;
}

// "lock and" degrades to lock cmpxchg
uint32_t lock_and_and(std::atomic<uint32_t> &a, uint32_t b)
{
  return a.fetch_and(b) & b;
}

// "lock and; or" degrades to lock cmpxchg
uint32_t lock_and_orneg(std::atomic<uint32_t> &a, uint32_t b)
{
  return a.fetch_and(b) | ~b;
}

// "lock xor; or" degrades to lock cmpxchg
uint32_t lock_xor_or(std::atomic<uint32_t> &a, uint32_t b)
{
  return a.fetch_xor(b) | b;
}

// "lock xor; and" degrades to lock cmpxchg
uint32_t lock_xor_andneg(std::atomic<uint32_t> &a, uint32_t b)
{
  return a.fetch_xor(b) & ~b;
}

Reply via email to