Prince781 updated this revision to Diff 215709.
Prince781 added a comment.

Use range-based version of llvm::sort


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D66122/new/

https://reviews.llvm.org/D66122

Files:
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/test/CodeGenCXX/cxx11-thread-local.cpp

Index: clang/test/CodeGenCXX/cxx11-thread-local.cpp
===================================================================
--- clang/test/CodeGenCXX/cxx11-thread-local.cpp
+++ clang/test/CodeGenCXX/cxx11-thread-local.cpp
@@ -268,6 +268,33 @@
   return this->n;
 }
 
+namespace static_tls_in_lambda {
+  struct X {
+    X() {}
+  };
+
+
+  X (*f())() {
+    static thread_local X x;
+
+    return [] { return x; };
+  }
+
+  auto y = f();
+
+  void g() { y(); }
+
+  void bar(X**, X**, X**);
+  void baz(void());
+  void f2() {
+      thread_local X x;
+      thread_local X* p = &x;
+      thread_local X* q = p;
+      thread_local X* r = q;
+      baz([]{bar(&p, &q, &r);});
+  }
+}
+
 namespace {
 thread_local int anon_i{1};
 }
@@ -303,6 +330,42 @@
 // CHECK: store i64 1, i64* @_ZGVN1XIiE1mE
 // CHECK: br label
 
+// CHECK: define internal void @"_ZZN20static_tls_in_lambda1fEvENK3$_1clEv"
+// CHECK: %[[static_tls_guard_val:.*]] = load i8, i8* @_ZGVZN20static_tls_in_lambda1fEvE1x
+// CHECK: %[[static_tls_guard_init:.*]] = icmp eq i8 %[[static_tls_guard_val]], 0
+// CHECK: br i1 %[[static_tls_guard_init]],
+// CHECK: call void @_ZN20static_tls_in_lambda1XC1Ev(%"struct.static_tls_in_lambda::X"* @_ZZN20static_tls_in_lambda1fEvE1x)
+// CHECK: store i8 1, i8* @_ZGVZN20static_tls_in_lambda1fEvE1x, align 1
+
+// CHECK: define internal void @"_ZZN20static_tls_in_lambda2f2EvENK3$_2clEv"
+// init x
+// CHECK: %[[static_tls_guard_val:.*]] = load i8, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1x
+// CHECK: %[[static_tls_guard_init:.*]] = icmp eq i8 %[[static_tls_guard_val]], 0
+// CHECK: br i1 %[[static_tls_guard_init]],
+// CHECK: call void @_ZN20static_tls_in_lambda1XC1Ev(%"struct.static_tls_in_lambda::X"* @_ZZN20static_tls_in_lambda2f2EvE1x)
+// CHECK: store i8 1, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1x, align 1
+// init p
+// CHECK: %[[static_tls_guard_val:.*]] = load i8, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1p
+// CHECK: %[[static_tls_guard_init:.*]] = icmp eq i8 %[[static_tls_guard_val]], 0
+// CHECK: br i1 %[[static_tls_guard_init]],
+// CHECK: store %"struct.static_tls_in_lambda::X"* @_ZZN20static_tls_in_lambda2f2EvE1x, %"struct.static_tls_in_lambda::X"** @_ZZN20static_tls_in_lambda2f2EvE1p
+// CHECK: store i8 1, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1p, align 1
+// init q
+// CHECK: %[[static_tls_guard_val:.*]] = load i8, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1q
+// CHECK: %[[static_tls_guard_init:.*]] = icmp eq i8 %[[static_tls_guard_val]], 0
+// CHECK: br i1 %[[static_tls_guard_init]],
+// CHECK: %[[static_tls_var_prev:.*]] = load %"struct.static_tls_in_lambda::X"*, %"struct.static_tls_in_lambda::X"** @_ZZN20static_tls_in_lambda2f2EvE1p, align 8
+// CHECK: store %"struct.static_tls_in_lambda::X"* %[[static_tls_var_prev]], %"struct.static_tls_in_lambda::X"** @_ZZN20static_tls_in_lambda2f2EvE1q, align 8
+// CHECK: store i8 1, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1q, align 1
+// init r
+// CHECK: %[[static_tls_guard_val:.*]] = load i8, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1r
+// CHECK: %[[static_tls_guard_init:.*]] = icmp eq i8 %[[static_tls_guard_val]], 0
+// CHECK: br i1 %[[static_tls_guard_init]],
+// CHECK: %[[static_tls_var_prev:.*]] = load %"struct.static_tls_in_lambda::X"*, %"struct.static_tls_in_lambda::X"** @_ZZN20static_tls_in_lambda2f2EvE1q, align 8
+// CHECK: store %"struct.static_tls_in_lambda::X"* %[[static_tls_var_prev]], %"struct.static_tls_in_lambda::X"** @_ZZN20static_tls_in_lambda2f2EvE1r, align 8
+// CHECK: store i8 1, i8* @_ZGVZN20static_tls_in_lambda2f2EvE1r, align 1
+
+
 // CHECK: define {{.*}}@[[GLOBAL_INIT:.*]]()
 // CHECK: call void @[[C_INIT]]()
 // CHECK: call void @[[E_INIT]]()
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -467,6 +467,10 @@
   /// should emit cleanups.
   bool CurFuncIsThunk = false;
 
+  /// static thread-local variables we've referenced that were declared in a
+  /// parent function.
+  llvm::SmallSet<const VarDecl *, 32> ForeignStaticTLSVars;
+
   /// In ARC, whether we should autorelease the return value.
   bool AutoreleaseResult = false;
 
Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -31,6 +31,7 @@
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Intrinsics.h"
@@ -295,6 +296,37 @@
   delete BB;
 }
 
+/// If a variable has an initializer, list all of the
+/// variables that it depends on.
+static llvm::SmallSet<const VarDecl *, 32> 
+enumerateVarInitDependencies(const VarDecl *VD) {
+  llvm::SmallSet<const VarDecl *, 32> deps;
+
+  if (const auto *InitExpr = VD->getInit()) {
+    std::deque<const Stmt *> frontier;
+
+    for (const auto *s : InitExpr->children())
+      frontier.emplace_back(s);
+
+    while (!frontier.empty()) {
+      auto x = frontier.front();
+      frontier.pop_front();
+      if (x->getStmtClass() == clang::Stmt::DeclRefExprClass) {
+        if (const auto *V = dyn_cast<VarDecl>(cast<DeclRefExpr>(x)->getDecl())) {
+          deps.insert(V);
+          auto V_Refs = enumerateVarInitDependencies(V);
+          deps.insert(V_Refs.begin(), V_Refs.end());
+        }
+      } else {
+        for (const auto *s : x->children())
+          frontier.emplace_back(s);
+      }
+    }
+  }
+
+  return deps;
+}
+
 void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
   assert(BreakContinueStack.empty() &&
          "mismatched push/pop in break/continue stack!");
@@ -384,6 +416,97 @@
     CGBuilderTy(*this, AllocaInsertPt).CreateCall(FrameEscapeFn, EscapeArgs);
   }
 
+  // Emit initializers for static local variables that we referenced that are
+  // declared in another function, which may be uninitialized on entry if this
+  // function may execute on a separate thread.  For example, when we're
+  // emitting the lambda in the following code:
+  // 
+  // class Object {
+  //   int init;
+  //   Object() : init(1) {}
+  // };
+  //
+  // main() {
+  //    static thread_local Object var;
+  //    std::thread([] {
+  //        ...emit initializer for var here...
+  //    });
+  // }
+  // 
+  // or another example:
+  //
+  // main() {
+  //    static Object var;
+  //    #pragma omp threadprivate(var)
+  //    #pragma omp parallel
+  //    {
+  //        ...emit initializer for var here...
+  //    }
+  // }
+  llvm::SmallSet<const VarDecl *, 32> UniqueVarsToInit;
+  llvm::SmallVector<const VarDecl *, 32> OrderedVarInits;
+  llvm::DenseMap<const VarDecl *, llvm::SmallSet<const VarDecl *, 4>> VarInitDependencies;
+
+  for (const VarDecl *VD : ForeignStaticTLSVars) {
+    llvm::SmallVector<const VarDecl *, 4> Frontier = {VD};
+
+    // don't initialize dependencies of CUDA __shared__ var with initializer?
+    bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+      VD->hasAttr<CUDASharedAttr>();
+    while (!isCudaSharedVar && !Frontier.empty()) {
+      auto *Child = Frontier.pop_back_val();
+
+      if (UniqueVarsToInit.find(Child) == UniqueVarsToInit.end()) {
+        for (auto *N : enumerateVarInitDependencies(Child)) {
+          Frontier.push_back(N);
+          VarInitDependencies[Child].insert(N);
+        }
+        UniqueVarsToInit.insert(Child);
+      }
+    }
+  }
+
+  for (const VarDecl *VD : UniqueVarsToInit)
+      OrderedVarInits.push_back(VD);
+
+  llvm::sort(OrderedVarInits,
+      [&VarInitDependencies](const VarDecl *a, const VarDecl *b) {
+        auto a_deps = VarInitDependencies[a];
+        // A < B iff B \in VarInitDependencies(A)
+        // the order is reversed because our codegen reverses the order of 
+        // initializers
+        return a_deps.find(b) != a_deps.end();
+      });
+
+  for (const VarDecl *VD : OrderedVarInits) {
+    // CUDA's local and local static __shared__ variables should not
+    // have any non-empty initializers. This is ensured by Sema.
+    // Whatever initializer such variable may have when it gets here is
+    // a no-op and should not be emitted.
+    bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+      VD->hasAttr<CUDASharedAttr>();
+    // If this value has an initializer, and it's thread-local, emit it.
+    if (VD->getInit() && !isCudaSharedVar) {
+      auto *GV = dyn_cast<llvm::GlobalVariable>(CGM.getStaticLocalDeclAddress(VD));
+      auto IP = Builder.saveAndClearIP();
+      llvm::BasicBlock *BBParent = AllocaInsertPt->getParent();
+      llvm::Instruction *INext = AllocaInsertPt->getNextNonDebugInstruction();
+      llvm::BasicBlock *BBNext = BBParent->splitBasicBlock(INext, BBParent->getName() + ".next");
+
+      INext = AllocaInsertPt->getNextNonDebugInstruction();
+
+      Builder.SetInsertPoint(BBParent);
+      // the global variable shouldn't change, as this function should've
+      // been called first when generating the parent function
+      AddInitializerToStaticVarDecl(*VD, GV);
+      if (INext != BBParent->getTerminator()) {
+        INext->eraseFromParent();
+        Builder.CreateBr(BBNext);
+      }
+      Builder.restoreIP(IP);
+    }
+  }
+
   // Remove the AllocaInsertPt instruction, which is just a convenience for us.
   llvm::Instruction *Ptr = AllocaInsertPt;
   AllocaInsertPt = nullptr;
Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -2614,7 +2614,12 @@
           *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)),
                      getContext().getDeclAlign(VD));
 
-    // No other cases for now.
+      // add to ForeignStaticVarDecls if this is a thread-local variable
+      // declared in a different function
+      const Decl *DC = cast<Decl>(VD->getDeclContext());
+      if (DC != CurGD.getDecl() && VD->getTLSKind() == VarDecl::TLS_Dynamic)
+        ForeignStaticTLSVars.insert(VD);
+     // No other cases for now.
     } else {
       llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?");
     }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to