AlexVlx updated this revision to Diff 544954.
AlexVlx removed a reviewer: eli.friedman.
AlexVlx added a comment.

This adds more ecumenical handling of unsupported builtins, as per the review 
discussion (a suffixed equivalent stub is emitted instead); it's paired with an 
associated change in accelerator code selection pass, where the actual check 
for these stubs occurs. I've also adjusted where the latter pass gets added to 
the `opt` pipeline, for the AMDGCN target; for the latter it's better, for the 
moment, to run it later because we essentially do LTCG, and therefore can 
unambiguously determine reachability by operating on the full module.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155850/new/

https://reviews.llvm.org/D155850

Files:
  clang/lib/CodeGen/BackendUtil.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenStdPar/unannotated-functions-get-emitted.cpp
  clang/test/CodeGenStdPar/unsupported-builtins.cpp

Index: clang/test/CodeGenStdPar/unsupported-builtins.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenStdPar/unsupported-builtins.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu \
+// RUN:   --stdpar -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+
+#define __global__ __attribute__((global))
+
+__global__ void foo() { return __builtin_ia32_pause(); }
+
+// CHECK: declare void @__builtin_ia32_pause__stdpar_unsupported()
Index: clang/test/CodeGenStdPar/unannotated-functions-get-emitted.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenStdPar/unannotated-functions-get-emitted.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -x hip -emit-llvm -fcuda-is-device \
+// RUN:   -o - %s | FileCheck --check-prefix=NO-STDPAR-DEV %s
+
+// RUN: %clang_cc1 --stdpar -emit-llvm -fcuda-is-device \
+// RUN:   -o - %s | FileCheck --check-prefix=STDPAR-DEV %s
+
+#define __device__ __attribute__((device))
+
+// NO-STDPAR-DEV-NOT: define {{.*}} void @_Z3fooPff({{.*}})
+// STDPAR-DEV: define {{.*}} void @_Z3fooPff({{.*}})
+void foo(float *a, float b) {
+  *a = b;
+}
+
+// NO-STDPAR-DEV: define {{.*}} void @_Z3barPff({{.*}})
+// STDPAR-DEV: define {{.*}} void @_Z3barPff({{.*}})
+__device__ void bar(float *a, float b) {
+  *a = b;
+}
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -3545,7 +3545,10 @@
           !Global->hasAttr<CUDAConstantAttr>() &&
           !Global->hasAttr<CUDASharedAttr>() &&
           !Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
-          !Global->getType()->isCUDADeviceBuiltinTextureType())
+          !Global->getType()->isCUDADeviceBuiltinTextureType() &&
+          !(LangOpts.HIPStdPar &&
+            isa<FunctionDecl>(Global) &&
+            !Global->hasAttr<CUDAHostAttr>()))
         return;
     } else {
       // We need to emit host-side 'shadows' for all global
@@ -5307,7 +5310,9 @@
 
   setNonAliasAttributes(D, GV);
 
-  if (D->getTLSKind() && !GV->isThreadLocal()) {
+  if (D->getTLSKind() &&
+      !GV->isThreadLocal() &&
+      !(getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)) {
     if (D->getTLSKind() == VarDecl::TLS_Dynamic)
       CXXThreadLocals.push_back(D);
     setTLSMode(GV, *D);
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -2251,6 +2251,19 @@
   return nullptr;
 }
 
+static RValue EmitStdParUnsupportedBuiltin(CodeGenFunction *CGF,
+                                           const FunctionDecl *FD) {
+  auto Name = FD->getNameAsString() + "__stdpar_unsupported";
+  auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
+  auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
+
+  SmallVector<Value*, 16> Args;
+  for (auto &&FormalTy : FnTy->params())
+    Args.push_back(llvm::PoisonValue::get(FormalTy));
+
+  return RValue::get(CGF->Builder.CreateCall(UBF, Args));
+}
+
 RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                                         const CallExpr *E,
                                         ReturnValueSlot ReturnValue) {
@@ -5541,7 +5554,10 @@
     llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
   }
 
-  ErrorUnsupported(E, "builtin function");
+  if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
+    return EmitStdParUnsupportedBuiltin(this, FD);
+  else
+    ErrorUnsupported(E, "builtin function");
 
   // Unknown builtin, for now just dump it out and return undef.
   return GetUndefRValue(E->getType());
Index: clang/lib/CodeGen/BackendUtil.cpp
===================================================================
--- clang/lib/CodeGen/BackendUtil.cpp
+++ clang/lib/CodeGen/BackendUtil.cpp
@@ -77,6 +77,7 @@
 #include "llvm/Transforms/Scalar/EarlyCSE.h"
 #include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Scalar/JumpThreading.h"
+#include "llvm/Transforms/StdPar/StdPar.h"
 #include "llvm/Transforms/Utils/Debugify.h"
 #include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -1093,6 +1094,16 @@
       TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1));
   }
 
+  if (LangOpts.HIPStdPar) {
+    if (LangOpts.CUDAIsDevice) {
+      if (!TargetTriple.isAMDGCN())
+        MPM.addPass(StdParAcceleratorCodeSelectionPass());
+    }
+    else if (LangOpts.HIPStdParInterposeAlloc) {
+      MPM.addPass(StdParAllocationInterpositionPass());
+    }
+  }
+
   // Now that we have all of the passes ready, run them.
   {
     PrettyStackTraceString CrashInfo("Optimizer");
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to