efriedma created this revision.
efriedma added reviewers: rjmccall, nikic.
Herald added a project: All.
efriedma requested review of this revision.
Herald added a project: clang.

Performing a load before calling __cxa_guard_acquire is supposed to be an 
optimization, but it isn't much of one if we're just going to emit a call to 
__atomic_load_1 instead.  Instead, just skip the load, and let 
__cxa_guard_acquire do whatever it wants.

(In practice, on such targets, the C++ library is just built with threading 
turned off, so the result isn't actually threadsafe, but there's not really 
anything clang can do about that.)

The alternative here is that we try to define some ABI for threadsafe init that 
allows the speculative load without full atomics.  Almost any target without 
full atomics has a load that's s "atomic enough" for this purpose. But it's not 
clear how we emit an "atomic enough" load in LLVM IR, and there isn't any ABI 
document we can refer to.

Or I guess we could turn off -fthreadsafe-statics by default on Cortex-M0, but 
that seems like it would be surprising.

Fixes https://github.com/llvm/llvm-project/issues/58184


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D135628

Files:
  clang/lib/CodeGen/ItaniumCXXABI.cpp
  clang/test/CodeGenCXX/threadsafe-statics-no-atomic.cpp

Index: clang/test/CodeGenCXX/threadsafe-statics-no-atomic.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenCXX/threadsafe-statics-no-atomic.cpp
@@ -0,0 +1,26 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -emit-llvm -triple=armv6m-eabi -o - %s | FileCheck %s
+
+int f();
+
+// CHECK-LABEL: @_Z1gv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load atomic i8, ptr @_ZGVZ1gvE1a acquire, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], 1
+// CHECK-NEXT:    [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP1]], 0
+// CHECK-NEXT:    br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]]
+// CHECK:       init.check:
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__cxa_guard_acquire(ptr @_ZGVZ1gvE1a) #[[ATTR1:[0-9]+]]
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK-NEXT:    br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]]
+// CHECK:       init:
+// CHECK-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z1fv()
+// CHECK-NEXT:    store i32 [[CALL]], ptr @_ZZ1gvE1a, align 4
+// CHECK-NEXT:    call void @__cxa_guard_release(ptr @_ZGVZ1gvE1a) #[[ATTR1]]
+// CHECK-NEXT:    br label [[INIT_END]]
+// CHECK:       init.end:
+// CHECK-NEXT:    ret void
+//
+void g() {
+  static int a = f();
+}
Index: clang/lib/CodeGen/ItaniumCXXABI.cpp
===================================================================
--- clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -2428,54 +2428,61 @@
   //         __cxa_guard_release (&obj_guard);
   //       }
   //     }
-
-  // Load the first byte of the guard variable.
-  llvm::LoadInst *LI =
-      Builder.CreateLoad(Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty));
-
-  // Itanium ABI:
-  //   An implementation supporting thread-safety on multiprocessor
-  //   systems must also guarantee that references to the initialized
-  //   object do not occur before the load of the initialization flag.
   //
-  // In LLVM, we do this by marking the load Acquire.
-  if (threadsafe)
-    LI->setAtomic(llvm::AtomicOrdering::Acquire);
+  // If threadsafe statics are enabled, but we don't have inline atomics, just
+  // call __cxa_guard_acquire unconditionally.  The "inline" check isn't
+  // actually inline, and the user might not expect calls to __atomic libcalls.
 
-  // For ARM, we should only check the first bit, rather than the entire byte:
-  //
-  // ARM C++ ABI 3.2.3.1:
-  //   To support the potential use of initialization guard variables
-  //   as semaphores that are the target of ARM SWP and LDREX/STREX
-  //   synchronizing instructions we define a static initialization
-  //   guard variable to be a 4-byte aligned, 4-byte word with the
-  //   following inline access protocol.
-  //     #define INITIALIZED 1
-  //     if ((obj_guard & INITIALIZED) != INITIALIZED) {
-  //       if (__cxa_guard_acquire(&obj_guard))
-  //         ...
-  //     }
-  //
-  // and similarly for ARM64:
-  //
-  // ARM64 C++ ABI 3.2.2:
-  //   This ABI instead only specifies the value bit 0 of the static guard
-  //   variable; all other bits are platform defined. Bit 0 shall be 0 when the
-  //   variable is not initialized and 1 when it is.
-  llvm::Value *V =
-      (UseARMGuardVarABI && !useInt8GuardVariable)
-          ? Builder.CreateAnd(LI, llvm::ConstantInt::get(CGM.Int8Ty, 1))
-          : LI;
-  llvm::Value *NeedsInit = Builder.CreateIsNull(V, "guard.uninitialized");
-
-  llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check");
+  unsigned MaxInlineWidthInBits = CGF.getTarget().getMaxAtomicInlineWidth();
   llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
-
-  // Check if the first byte of the guard variable is zero.
-  CGF.EmitCXXGuardedInitBranch(NeedsInit, InitCheckBlock, EndBlock,
-                               CodeGenFunction::GuardKind::VariableGuard, &D);
-
-  CGF.EmitBlock(InitCheckBlock);
+  if (!threadsafe || MaxInlineWidthInBits) {
+    // Load the first byte of the guard variable.
+    llvm::LoadInst *LI =
+        Builder.CreateLoad(Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty));
+
+    // Itanium ABI:
+    //   An implementation supporting thread-safety on multiprocessor
+    //   systems must also guarantee that references to the initialized
+    //   object do not occur before the load of the initialization flag.
+    //
+    // In LLVM, we do this by marking the load Acquire.
+    if (threadsafe)
+      LI->setAtomic(llvm::AtomicOrdering::Acquire);
+
+    // For ARM, we should only check the first bit, rather than the entire byte:
+    //
+    // ARM C++ ABI 3.2.3.1:
+    //   To support the potential use of initialization guard variables
+    //   as semaphores that are the target of ARM SWP and LDREX/STREX
+    //   synchronizing instructions we define a static initialization
+    //   guard variable to be a 4-byte aligned, 4-byte word with the
+    //   following inline access protocol.
+    //     #define INITIALIZED 1
+    //     if ((obj_guard & INITIALIZED) != INITIALIZED) {
+    //       if (__cxa_guard_acquire(&obj_guard))
+    //         ...
+    //     }
+    //
+    // and similarly for ARM64:
+    //
+    // ARM64 C++ ABI 3.2.2:
+    //   This ABI instead only specifies the value bit 0 of the static guard
+    //   variable; all other bits are platform defined. Bit 0 shall be 0 when the
+    //   variable is not initialized and 1 when it is.
+    llvm::Value *V =
+        (UseARMGuardVarABI && !useInt8GuardVariable)
+            ? Builder.CreateAnd(LI, llvm::ConstantInt::get(CGM.Int8Ty, 1))
+            : LI;
+    llvm::Value *NeedsInit = Builder.CreateIsNull(V, "guard.uninitialized");
+
+    llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check");
+
+    // Check if the first byte of the guard variable is zero.
+    CGF.EmitCXXGuardedInitBranch(NeedsInit, InitCheckBlock, EndBlock,
+                                 CodeGenFunction::GuardKind::VariableGuard, &D);
+
+    CGF.EmitBlock(InitCheckBlock);
+  }
 
   // Variables used when coping with thread-safe statics and exceptions.
   if (threadsafe) {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to