================
@@ -424,6 +424,34 @@ void 
CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
       CGM.CreateRuntimeFunction(FTy, LaunchKernelName);
   CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
                LaunchKernelArgs);
+
+  // To prevent CUDA device stub functions from being merged by ICF in MSVC
+  // environment, create an unique global variable for each kernel and write to
+  // the variable in the device stub.
+  if (CGM.getContext().getTargetInfo().getCXXABI().isMicrosoft() &&
+      !CGF.getLangOpts().HIP) {
+    llvm::Function *KernelFunction = llvm::cast<llvm::Function>(Kernel);
+    if (KernelFunction->hasComdat()) {
+      std::string KernelName = KernelFunction->getName().str();
+      std::string GlobalVarName = KernelName + ".id";
+
+      llvm::GlobalVariable *HandleVar =
+          CGM.getModule().getNamedGlobal(GlobalVarName);
+      if (!HandleVar) {
+        HandleVar = new llvm::GlobalVariable(
+            CGM.getModule(), CGM.Int8Ty,
+            /*Constant=*/false, KernelFunction->getLinkage(),
+            llvm::ConstantInt::get(CGM.Int8Ty, 0), GlobalVarName);
+        HandleVar->setDSOLocal(KernelFunction->isDSOLocal());
+        HandleVar->setVisibility(KernelFunction->getVisibility());
+        HandleVar->setComdat(CGM.getModule().getOrInsertComdat(GlobalVarName));
+      }
+
+      CGF.Builder.CreateAlignedStore(llvm::ConstantInt::get(CGM.Int8Ty, 1),
----------------
rnk wrote:

LLVM knows how to optimize away a single write to an otherwise unused global, 
so I would mark this store volatile.

https://github.com/llvm/llvm-project/pull/90155
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to