This revision was automatically updated to reflect the committed changes.
Closed by commit rG8a56d64d7620: [OpenMP][IRBuilder] Perform finalization 
(incl. outlining) late (authored by jdoerfert).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74372/new/

https://reviews.llvm.org/D74372

Files:
  clang/lib/CodeGen/CodeGenFunction.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
  llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
  llvm/lib/Transforms/Utils/CodeExtractor.cpp
  llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp

Index: llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
===================================================================
--- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -96,7 +96,7 @@
   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
 
   Builder.CreateUnreachable();
-  EXPECT_FALSE(verifyModule(*M));
+  EXPECT_FALSE(verifyModule(*M, &errs()));
 }
 
 TEST_F(OpenMPIRBuilderTest, CreateCancel) {
@@ -151,7 +151,7 @@
   OMPBuilder.popFinalizationCB();
 
   Builder.CreateUnreachable();
-  EXPECT_FALSE(verifyModule(*M));
+  EXPECT_FALSE(verifyModule(*M, &errs()));
 }
 
 TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) {
@@ -212,7 +212,7 @@
   OMPBuilder.popFinalizationCB();
 
   Builder.CreateUnreachable();
-  EXPECT_FALSE(verifyModule(*M));
+  EXPECT_FALSE(verifyModule(*M, &errs()));
 }
 
 TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
@@ -267,7 +267,7 @@
   OMPBuilder.popFinalizationCB();
 
   Builder.CreateUnreachable();
-  EXPECT_FALSE(verifyModule(*M));
+  EXPECT_FALSE(verifyModule(*M, &errs()));
 }
 
 TEST_F(OpenMPIRBuilderTest, DbgLoc) {
@@ -362,9 +362,9 @@
 
   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
 
-  IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
-      Loc, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
-
+  IRBuilder<>::InsertPoint AfterIP =
+      OMPBuilder.CreateParallel(Loc, BodyGenCB, PrivCB, FiniCB, nullptr,
+                                nullptr, OMP_PROC_BIND_default, false);
   EXPECT_EQ(NumBodiesGenerated, 1U);
   EXPECT_EQ(NumPrivatizedVars, 1U);
   EXPECT_EQ(NumFinalizationPoints, 1U);
@@ -372,10 +372,12 @@
   Builder.restoreIP(AfterIP);
   Builder.CreateRetVoid();
 
+  OMPBuilder.finalize();
+
   EXPECT_NE(PrivAI, nullptr);
   Function *OutlinedFn = PrivAI->getFunction();
   EXPECT_NE(F, OutlinedFn);
-  EXPECT_FALSE(verifyModule(*M));
+  EXPECT_FALSE(verifyModule(*M, &errs()));
   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoRecurse));
   EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
@@ -470,6 +472,7 @@
 
   Builder.restoreIP(AfterIP);
   Builder.CreateRetVoid();
+  OMPBuilder.finalize();
 
   EXPECT_NE(PrivAI, nullptr);
   Function *OutlinedFn = PrivAI->getFunction();
@@ -595,6 +598,7 @@
 
   Builder.restoreIP(AfterIP);
   Builder.CreateRetVoid();
+  OMPBuilder.finalize();
 
   EXPECT_FALSE(verifyModule(*M, &errs()));
 
Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp
===================================================================
--- llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1405,11 +1405,7 @@
   DISubprogram *OldSP = OldFunc.getSubprogram();
   LLVMContext &Ctx = OldFunc.getContext();
 
-  // See llvm.org/PR44560, OpenMP passes an invalid subprogram to CodeExtractor.
-  bool NeedWorkaroundForOpenMPIRBuilderBug =
-      OldSP && OldSP->getRetainedNodes()->isTemporary();
-
-  if (!OldSP || NeedWorkaroundForOpenMPIRBuilderBug) {
+  if (!OldSP) {
     // Erase any debug info the new function contains.
     stripDebugInfo(NewFunc);
     // Make sure the old function doesn't contain any non-local metadata refs.
Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===================================================================
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -93,6 +93,55 @@
 
 void OpenMPIRBuilder::initialize() { initializeTypes(M); }
 
+void OpenMPIRBuilder::finalize() {
+  for (OutlineInfo &OI : OutlineInfos) {
+    assert(!OI.Blocks.empty() &&
+           "Outlined regions should have at least a single block!");
+    BasicBlock *RegEntryBB = OI.Blocks.front();
+    Function *OuterFn = RegEntryBB->getParent();
+    CodeExtractorAnalysisCache CEAC(*OuterFn);
+    CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
+                            /* AggregateArgs */ false,
+                            /* BlockFrequencyInfo */ nullptr,
+                            /* BranchProbabilityInfo */ nullptr,
+                            /* AssumptionCache */ nullptr,
+                            /* AllowVarArgs */ true,
+                            /* AllowAlloca */ true,
+                            /* Suffix */ ".omp_par");
+
+    LLVM_DEBUG(dbgs() << "Before     outlining: " << *OuterFn << "\n");
+
+    Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
+
+    LLVM_DEBUG(dbgs() << "After      outlining: " << *OuterFn << "\n");
+    LLVM_DEBUG(dbgs() << "   Outlined function: " << *OutlinedFn << "\n");
+
+    // For compability with the clang CG we move the outlined function after the
+    // one with the parallel region.
+    OutlinedFn->removeFromParent();
+    M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
+
+    // Remove the artificial entry introduced by the extractor right away, we
+    // made our own entry block after all.
+    {
+      BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
+      assert(ArtificialEntry.getUniqueSuccessor() == RegEntryBB);
+      assert(RegEntryBB->getUniquePredecessor() == &ArtificialEntry);
+      RegEntryBB->moveBefore(&ArtificialEntry);
+      ArtificialEntry.eraseFromParent();
+    }
+    assert(&OutlinedFn->getEntryBlock() == RegEntryBB);
+    assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
+
+    // Run a user callback, e.g. to add attributes.
+    if (OI.PostOutlineCB)
+      OI.PostOutlineCB(*OutlinedFn);
+  }
+
+  // Allow finalize to be called multiple times.
+  OutlineInfos.clear();
+}
+
 Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
                                          IdentFlag LocFlags) {
   // Enable "C-mode".
@@ -415,17 +464,18 @@
   // PRegionExitBB          <- A common exit to simplify block collection.
   //
 
-  LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n");
+  LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
 
   // Let the caller create the body.
   assert(BodyGenCB && "Expected body generation callback!");
   InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
   BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
 
-  LLVM_DEBUG(dbgs() << "After  body codegen: " << *UI->getFunction() << "\n");
+  LLVM_DEBUG(dbgs() << "After  body codegen: " << *OuterFn << "\n");
 
+  OutlineInfo OI;
   SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
-  SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist;
+  SmallVector<BasicBlock *, 32> Worklist;
   ParallelRegionBlockSet.insert(PRegEntryBB);
   ParallelRegionBlockSet.insert(PRegExitBB);
 
@@ -433,14 +483,14 @@
   Worklist.push_back(PRegEntryBB);
   while (!Worklist.empty()) {
     BasicBlock *BB = Worklist.pop_back_val();
-    ParallelRegionBlocks.push_back(BB);
+    OI.Blocks.push_back(BB);
     for (BasicBlock *SuccBB : successors(BB))
       if (ParallelRegionBlockSet.insert(SuccBB).second)
         Worklist.push_back(SuccBB);
   }
 
   CodeExtractorAnalysisCache CEAC(*OuterFn);
-  CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr,
+  CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
                           /* AggregateArgs */ false,
                           /* BlockFrequencyInfo */ nullptr,
                           /* BranchProbabilityInfo */ nullptr,
@@ -455,7 +505,7 @@
   Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
   Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
 
-  LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n");
+  LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
 
   FunctionCallee TIDRTLFn =
       getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num);
@@ -496,56 +546,12 @@
     PrivHelper(*Output);
   }
 
-  LLVM_DEBUG(dbgs() << "After  privatization: " << *UI->getFunction() << "\n");
+  LLVM_DEBUG(dbgs() << "After  privatization: " << *OuterFn << "\n");
   LLVM_DEBUG({
-    for (auto *BB : ParallelRegionBlocks)
+    for (auto *BB : OI.Blocks)
       dbgs() << " PBR: " << BB->getName() << "\n";
   });
 
-  // Add some known attributes to the outlined function.
-  Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
-  OutlinedFn->addParamAttr(0, Attribute::NoAlias);
-  OutlinedFn->addParamAttr(1, Attribute::NoAlias);
-  OutlinedFn->addFnAttr(Attribute::NoUnwind);
-  OutlinedFn->addFnAttr(Attribute::NoRecurse);
-
-  LLVM_DEBUG(dbgs() << "After      outlining: " << *UI->getFunction() << "\n");
-  LLVM_DEBUG(dbgs() << "   Outlined function: " << *OutlinedFn << "\n");
-
-  // For compability with the clang CG we move the outlined function after the
-  // one with the parallel region.
-  OutlinedFn->removeFromParent();
-  M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
-
-  // Remove the artificial entry introduced by the extractor right away, we
-  // made our own entry block after all.
-  {
-    BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
-    assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB);
-    assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry);
-    PRegEntryBB->moveBefore(&ArtificialEntry);
-    ArtificialEntry.eraseFromParent();
-  }
-  LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n");
-  assert(&OutlinedFn->getEntryBlock() == PRegEntryBB);
-
-  assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
-  assert(OutlinedFn->arg_size() >= 2 &&
-         "Expected at least tid and bounded tid as arguments");
-  unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2;
-
-  CallInst *CI = cast<CallInst>(OutlinedFn->user_back());
-  CI->getParent()->setName("omp_parallel");
-  Builder.SetInsertPoint(CI);
-
-  // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
-  Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
-                           Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)};
-
-  SmallVector<Value *, 16> RealArgs;
-  RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
-  RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
-
   FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
   if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
     if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
@@ -558,59 +564,86 @@
       //    callback callee.
       F->addMetadata(
           llvm::LLVMContext::MD_callback,
-          *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
-                                      2, {-1, -1},
-                                      /* VarArgsArePassed */ true)}));
+          *llvm::MDNode::get(
+              Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
+                                               /* VarArgsArePassed */ true)}));
     }
   }
 
-  Builder.CreateCall(RTLFn, RealArgs);
+  OI.PostOutlineCB = [=](Function &OutlinedFn) {
+    // Add some known attributes.
+    OutlinedFn.addParamAttr(0, Attribute::NoAlias);
+    OutlinedFn.addParamAttr(1, Attribute::NoAlias);
+    OutlinedFn.addFnAttr(Attribute::NoUnwind);
+    OutlinedFn.addFnAttr(Attribute::NoRecurse);
 
-  LLVM_DEBUG(dbgs() << "With fork_call placed: "
-                    << *Builder.GetInsertBlock()->getParent() << "\n");
+    assert(OutlinedFn.arg_size() >= 2 &&
+           "Expected at least tid and bounded tid as arguments");
+    unsigned NumCapturedVars =
+        OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
 
-  InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
-  InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
-  UI->eraseFromParent();
+    CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
+    CI->getParent()->setName("omp_parallel");
+    Builder.SetInsertPoint(CI);
 
-  // Initialize the local TID stack location with the argument value.
-  Builder.SetInsertPoint(PrivTID);
-  Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin();
-  Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
+    // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
+    Value *ForkCallArgs[] = {
+        Ident, Builder.getInt32(NumCapturedVars),
+        Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
 
-  // If no "if" clause was present we do not need the call created during
-  // outlining, otherwise we reuse it in the serialized parallel region.
-  if (!ElseTI) {
-    CI->eraseFromParent();
-  } else {
+    SmallVector<Value *, 16> RealArgs;
+    RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
+    RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
 
-    // If an "if" clause was present we are now generating the serialized
-    // version into the "else" branch.
-    Builder.SetInsertPoint(ElseTI);
+    Builder.CreateCall(RTLFn, RealArgs);
 
-    // Build calls __kmpc_serialized_parallel(&Ident, GTid);
-    Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
-    Builder.CreateCall(
-        getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
-        SerializedParallelCallArgs);
+    LLVM_DEBUG(dbgs() << "With fork_call placed: "
+                      << *Builder.GetInsertBlock()->getParent() << "\n");
 
-    // OutlinedFn(&GTid, &zero, CapturedStruct);
-    CI->removeFromParent();
-    Builder.Insert(CI);
+    InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
 
-    // __kmpc_end_serialized_parallel(&Ident, GTid);
-    Value *EndArgs[] = {Ident, ThreadID};
-    Builder.CreateCall(
-        getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
-        EndArgs);
+    // Initialize the local TID stack location with the argument value.
+    Builder.SetInsertPoint(PrivTID);
+    Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
+    Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
 
-    LLVM_DEBUG(dbgs() << "With serialized parallel region: "
-                      << *Builder.GetInsertBlock()->getParent() << "\n");
-  }
+    // If no "if" clause was present we do not need the call created during
+    // outlining, otherwise we reuse it in the serialized parallel region.
+    if (!ElseTI) {
+      CI->eraseFromParent();
+    } else {
+
+      // If an "if" clause was present we are now generating the serialized
+      // version into the "else" branch.
+      Builder.SetInsertPoint(ElseTI);
+
+      // Build calls __kmpc_serialized_parallel(&Ident, GTid);
+      Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
+      Builder.CreateCall(
+          getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
+          SerializedParallelCallArgs);
+
+      // OutlinedFn(&GTid, &zero, CapturedStruct);
+      CI->removeFromParent();
+      Builder.Insert(CI);
+
+      // __kmpc_end_serialized_parallel(&Ident, GTid);
+      Value *EndArgs[] = {Ident, ThreadID};
+      Builder.CreateCall(
+          getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
+          EndArgs);
+
+      LLVM_DEBUG(dbgs() << "With serialized parallel region: "
+                        << *Builder.GetInsertBlock()->getParent() << "\n");
+    }
+
+    for (Instruction *I : ToBeDeleted)
+      I->eraseFromParent();
+  };
 
   // Adjust the finalization stack, verify the adjustment, and call the
-  // finalize function a last time to finalize values between the pre-fini block
-  // and the exit block if we left the parallel "the normal way".
+  // finalize function a last time to finalize values between the pre-fini
+  // block and the exit block if we left the parallel "the normal way".
   auto FiniInfo = FinalizationStack.pop_back_val();
   (void)FiniInfo;
   assert(FiniInfo.DK == OMPD_parallel &&
@@ -618,15 +651,17 @@
 
   Instruction *PreFiniTI = PRegPreFiniBB->getTerminator();
   assert(PreFiniTI->getNumSuccessors() == 1 &&
-         PreFiniTI->getSuccessor(0)->size() == 1 &&
-         isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) &&
+         PreFiniTI->getSuccessor(0) == PRegExitBB &&
          "Unexpected CFG structure!");
 
   InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator());
   FiniCB(PreFiniIP);
 
-  for (Instruction *I : ToBeDeleted)
-    I->eraseFromParent();
+  InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
+  UI->eraseFromParent();
+
+  // Register the outlined info.
+  addOutlineInfo(std::move(OI));
 
   return AfterIP;
 }
Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
===================================================================
--- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -34,6 +34,9 @@
   /// before any other method and only once!
   void initialize();
 
+  /// Finalize the underlying module, e.g., by outlining regions.
+  void finalize();
+
   /// Add attributes known for \p FnID to \p Fn.
   void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
 
@@ -254,6 +257,20 @@
 
   /// Map to remember existing ident_t*.
   DenseMap<std::pair<Constant *, uint64_t>, GlobalVariable *> IdentMap;
+
+  /// Helper that contains information about regions we need to outline
+  /// during finalization.
+  struct OutlineInfo {
+    SmallVector<BasicBlock *, 32> Blocks;
+    using PostOutlineCBTy = std::function<void(Function &)>;
+    PostOutlineCBTy PostOutlineCB;
+  };
+
+  /// Collection of regions that need to be outlined during finalization.
+  SmallVector<OutlineInfo, 16> OutlineInfos;
+
+  /// Add a new region that will be outlined later.
+  void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
 };
 
 } // end namespace llvm
Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -32,6 +32,7 @@
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/FPEnv.h"
@@ -104,6 +105,14 @@
 
   if (getLangOpts().OpenMP && CurFn)
     CGM.getOpenMPRuntime().functionFinished(*this);
+
+  // If we have an OpenMPIRBuilder we want to finalize functions (incl.
+  // outlining etc) at some point. Doing it once the function codegen is done
+  // seems to be a reasonable spot. We do it here, as opposed to the deletion
+  // time of the CodeGenModule, because we have to ensure the IR has not yet
+  // been "emitted" to the outside, thus, modifications are still sensible.
+  if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder())
+    OMPBuilder->finalize();
 }
 
 // Map the LangOption for rounding mode into
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to