AlexVlx updated this revision to Diff 544954. AlexVlx removed a reviewer: eli.friedman. AlexVlx added a comment.
This adds more ecumenical handling of unsupported builtins, as per the review discussion (a suffixed equivalent stub is emitted instead); it's paired with an associated change in accelerator code selection pass, where the actual check for these stubs occurs. I've also adjusted where the latter pass gets added to the `opt` pipeline, for the AMDGCN target; for the latter it's better, for the moment, to run it later because we essentially do LTCG, and therefore can unambiguously determine reachability by operating on the full module. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D155850/new/ https://reviews.llvm.org/D155850 Files: clang/lib/CodeGen/BackendUtil.cpp clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenModule.cpp clang/test/CodeGenStdPar/unannotated-functions-get-emitted.cpp clang/test/CodeGenStdPar/unsupported-builtins.cpp
Index: clang/test/CodeGenStdPar/unsupported-builtins.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenStdPar/unsupported-builtins.cpp @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu \ +// RUN: --stdpar -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s + +#define __global__ __attribute__((global)) + +__global__ void foo() { return __builtin_ia32_pause(); } + +// CHECK: declare void @__builtin_ia32_pause__stdpar_unsupported() Index: clang/test/CodeGenStdPar/unannotated-functions-get-emitted.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenStdPar/unannotated-functions-get-emitted.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -x hip -emit-llvm -fcuda-is-device \ +// RUN: -o - %s | FileCheck --check-prefix=NO-STDPAR-DEV %s + +// RUN: %clang_cc1 --stdpar -emit-llvm -fcuda-is-device \ +// RUN: -o - %s | FileCheck --check-prefix=STDPAR-DEV %s + +#define __device__ __attribute__((device)) + +// NO-STDPAR-DEV-NOT: define {{.*}} void @_Z3fooPff({{.*}}) +// STDPAR-DEV: define {{.*}} void @_Z3fooPff({{.*}}) +void foo(float *a, float b) { + *a = b; +} + +// NO-STDPAR-DEV: define {{.*}} void @_Z3barPff({{.*}}) +// STDPAR-DEV: define {{.*}} void @_Z3barPff({{.*}}) +__device__ void bar(float *a, float b) { + *a = b; +} Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -3545,7 +3545,10 @@ !Global->hasAttr<CUDAConstantAttr>() && !Global->hasAttr<CUDASharedAttr>() && !Global->getType()->isCUDADeviceBuiltinSurfaceType() && - !Global->getType()->isCUDADeviceBuiltinTextureType()) + !Global->getType()->isCUDADeviceBuiltinTextureType() && + !(LangOpts.HIPStdPar && + isa<FunctionDecl>(Global) && + !Global->hasAttr<CUDAHostAttr>())) return; } else { // We need to emit host-side 'shadows' for all global @@ -5307,7 +5310,9 @@ setNonAliasAttributes(D, GV); - if (D->getTLSKind() && !GV->isThreadLocal()) { + if (D->getTLSKind() && + !GV->isThreadLocal() && + !(getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)) { if (D->getTLSKind() == VarDecl::TLS_Dynamic) CXXThreadLocals.push_back(D); setTLSMode(GV, *D); Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -2251,6 +2251,19 @@ return nullptr; } +static RValue EmitStdParUnsupportedBuiltin(CodeGenFunction *CGF, + const FunctionDecl *FD) { + auto Name = FD->getNameAsString() + "__stdpar_unsupported"; + auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD); + auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy); + + SmallVector<Value*, 16> Args; + for (auto &&FormalTy : FnTy->params()) + Args.push_back(llvm::PoisonValue::get(FormalTy)); + + return RValue::get(CGF->Builder.CreateCall(UBF, Args)); +} + RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -5541,7 +5554,10 @@ llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); } - ErrorUnsupported(E, "builtin function"); + if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice) + return EmitStdParUnsupportedBuiltin(this, FD); + else + ErrorUnsupported(E, "builtin function"); // Unknown builtin, for now just dump it out and return undef. return GetUndefRValue(E->getType()); Index: clang/lib/CodeGen/BackendUtil.cpp =================================================================== --- clang/lib/CodeGen/BackendUtil.cpp +++ clang/lib/CodeGen/BackendUtil.cpp @@ -77,6 +77,7 @@ #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/JumpThreading.h" +#include "llvm/Transforms/StdPar/StdPar.h" #include "llvm/Transforms/Utils/Debugify.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -1093,6 +1094,16 @@ TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1)); } + if (LangOpts.HIPStdPar) { + if (LangOpts.CUDAIsDevice) { + if (!TargetTriple.isAMDGCN()) + MPM.addPass(StdParAcceleratorCodeSelectionPass()); + } + else if (LangOpts.HIPStdParInterposeAlloc) { + MPM.addPass(StdParAllocationInterpositionPass()); + } + } + // Now that we have all of the passes ready, run them. { PrettyStackTraceString CrashInfo("Optimizer");
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits