This revision was automatically updated to reflect the committed changes. Closed by commit rG229c78d3a5d1: [CUDA][HIP] Fix host/device check with -fopenmp (authored by yaxunl).
Changed prior to commit: https://reviews.llvm.org/D67837?vs=224126&id=224208#toc Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D67837/new/ https://reviews.llvm.org/D67837 Files: clang/include/clang/Sema/Sema.h clang/lib/Sema/SemaCUDA.cpp clang/lib/Sema/SemaDecl.cpp clang/lib/Sema/SemaOpenMP.cpp clang/test/CodeGenCUDA/openmp-target.cu clang/test/OpenMP/declare_target_messages.cpp clang/test/SemaCUDA/call-device-fn-from-host.cu clang/test/SemaCUDA/host-device-constexpr.cu clang/test/SemaCUDA/openmp-static-func.cu clang/test/SemaCUDA/openmp-target.cu
Index: clang/test/SemaCUDA/openmp-target.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/openmp-target.cu @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -triple x86_64 -verify=expected,dev \ +// RUN: -verify-ignore-unexpected=note \ +// RUN: -fopenmp -fopenmp-version=50 -o - %s +// RUN: %clang_cc1 -triple x86_64 -verify -verify-ignore-unexpected=note\ +// RUN: -fopenmp -fopenmp-version=50 -o - -x c++ %s +// RUN: %clang_cc1 -triple x86_64 -verify=dev -verify-ignore-unexpected=note\ +// RUN: -fcuda-is-device -o - %s + +#if __CUDA__ +#include "Inputs/cuda.h" +__device__ void cu_devf(); +#endif + +void bazz() {} +#pragma omp declare target to(bazz) device_type(nohost) +void bazzz() {bazz();} +#pragma omp declare target to(bazzz) device_type(nohost) +void any() {bazz();} // expected-error {{function with 'device_type(nohost)' is not available on host}} +void host1() {bazz();} +#pragma omp declare target to(host1) device_type(host) +void host2() {bazz();} +#pragma omp declare target to(host2) +void device() {host1();} +#pragma omp declare target to(device) device_type(nohost) +void host3() {host1();} +#pragma omp declare target to(host3) + +#pragma omp declare target +void any1() {any();} +void any2() {host1();} +void any3() {device();} // expected-error {{function with 'device_type(nohost)' is not available on host}} +void any4() {any2();} +#pragma omp end declare target + +void any5() {any();} +void any6() {host1();} +void any7() {device();} // expected-error {{function with 'device_type(nohost)' is not available on host}} +void any8() {any2();} + +#if __CUDA__ +void cu_hostf() { cu_devf(); } // dev-error {{no matching function for call to 'cu_devf'}} +__device__ void cu_devf2() { cu_hostf(); } // dev-error{{no matching function for call to 'cu_hostf'}} +#endif Index: clang/test/SemaCUDA/openmp-static-func.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/openmp-static-func.cu @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only \ +// RUN: -verify -fopenmp %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only \ +// RUN: -verify -fopenmp -x hip %s +// expected-no-diagnostics + +// Tests there is no assertion in Sema::markKnownEmitted when fopenmp is used +// with CUDA/HIP host compilation. + +static void f() {} + +static void g() { f(); } + +static void h() { g(); } Index: clang/test/SemaCUDA/host-device-constexpr.cu =================================================================== --- clang/test/SemaCUDA/host-device-constexpr.cu +++ clang/test/SemaCUDA/host-device-constexpr.cu @@ -1,5 +1,10 @@ // RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s -// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s -fcuda-is-device +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s \ +// RUN: -fcuda-is-device +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs \ +// RUN: -fopenmp %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs \ +// RUN: -fopenmp %s -fcuda-is-device #include "Inputs/cuda.h" Index: clang/test/SemaCUDA/call-device-fn-from-host.cu =================================================================== --- clang/test/SemaCUDA/call-device-fn-from-host.cu +++ clang/test/SemaCUDA/call-device-fn-from-host.cu @@ -1,5 +1,7 @@ // RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \ // RUN: -verify -verify-ignore-unexpected=note +// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \ +// RUN: -verify -verify-ignore-unexpected=note -fopenmp // Note: This test won't work with -fsyntax-only, because some of these errors // are emitted during codegen. Index: clang/test/OpenMP/declare_target_messages.cpp =================================================================== --- clang/test/OpenMP/declare_target_messages.cpp +++ clang/test/OpenMP/declare_target_messages.cpp @@ -162,10 +162,10 @@ #pragma omp declare target link(x) // expected-error {{'x' must not appear in both clauses 'to' and 'link'}} void bazz() {} -#pragma omp declare target to(bazz) device_type(nohost) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} +#pragma omp declare target to(bazz) device_type(nohost) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} host5-note {{marked as 'device_type(nohost)' here}} void bazzz() {bazz();} #pragma omp declare target to(bazzz) device_type(nohost) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} -void any() {bazz();} +void any() {bazz();} // host5-error {{function with 'device_type(nohost)' is not available on host}} void host1() {bazz();} #pragma omp declare target to(host1) device_type(host) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} dev5-note 2 {{marked as 'device_type(host)' here}} void host2() {bazz();} Index: clang/test/CodeGenCUDA/openmp-target.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/openmp-target.cu @@ -0,0 +1,20 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm \ +// RUN: -fopenmp -fopenmp-version=50 -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm \ +// RUN: -fopenmp -fopenmp-version=50 -o - -x c++ %s | FileCheck %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=DEV %s + +// CHECK: declare{{.*}}@_Z7nohost1v() +// DEV-NOT: _Z7nohost1v +void nohost1() {} +#pragma omp declare target to(nohost1) device_type(nohost) + +// CHECK: declare{{.*}}@_Z7nohost2v() +// DEV-NOT: _Z7nohost2v +void nohost2() {nohost1();} +#pragma omp declare target to(nohost2) device_type(nohost) + Index: clang/lib/Sema/SemaOpenMP.cpp =================================================================== --- clang/lib/Sema/SemaOpenMP.cpp +++ clang/lib/Sema/SemaOpenMP.cpp @@ -1565,34 +1565,11 @@ }; } // anonymous namespace -/// Do we know that we will eventually codegen the given function? -static FunctionEmissionStatus isKnownDeviceEmitted(Sema &S, FunctionDecl *FD) { - assert(S.LangOpts.OpenMP && S.LangOpts.OpenMPIsDevice && - "Expected OpenMP device compilation."); - // Templates are emitted when they're instantiated. - if (FD->isDependentContext()) - return FunctionEmissionStatus::Discarded; - - Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = - OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); - if (DevTy.hasValue()) - return (*DevTy == OMPDeclareTargetDeclAttr::DT_Host) - ? FunctionEmissionStatus::Discarded - : FunctionEmissionStatus::Emitted; - - // Otherwise, the function is known-emitted if it's in our set of - // known-emitted functions. - return (S.DeviceKnownEmittedFns.count(FD) > 0) - ? FunctionEmissionStatus::Emitted - : FunctionEmissionStatus::Unknown; -} - Sema::DeviceDiagBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc, unsigned DiagID) { assert(LangOpts.OpenMP && LangOpts.OpenMPIsDevice && "Expected OpenMP device compilation."); - FunctionEmissionStatus FES = - isKnownDeviceEmitted(*this, getCurFunctionDecl()); + FunctionEmissionStatus FES = getEmissionStatus(getCurFunctionDecl()); DeviceDiagBuilder::Kind Kind = DeviceDiagBuilder::K_Nop; switch (FES) { case FunctionEmissionStatus::Emitted: @@ -1602,42 +1579,23 @@ Kind = isOpenMPDeviceDelayedContext(*this) ? DeviceDiagBuilder::K_Deferred : DeviceDiagBuilder::K_Immediate; break; - case FunctionEmissionStatus::Discarded: + case FunctionEmissionStatus::TemplateDiscarded: + case FunctionEmissionStatus::OMPDiscarded: Kind = DeviceDiagBuilder::K_Nop; break; + case FunctionEmissionStatus::CUDADiscarded: + llvm_unreachable("CUDADiscarded unexpected in OpenMP device compilation"); + break; } return DeviceDiagBuilder(Kind, Loc, DiagID, getCurFunctionDecl(), *this); } -/// Do we know that we will eventually codegen the given function? -static FunctionEmissionStatus isKnownHostEmitted(Sema &S, FunctionDecl *FD) { - assert(S.LangOpts.OpenMP && !S.LangOpts.OpenMPIsDevice && - "Expected OpenMP host compilation."); - // In OpenMP 4.5 all the functions are host functions. - if (S.LangOpts.OpenMP <= 45) - return FunctionEmissionStatus::Emitted; - - Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = - OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); - if (DevTy.hasValue()) - return (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) - ? FunctionEmissionStatus::Discarded - : FunctionEmissionStatus::Emitted; - - // Otherwise, the function is known-emitted if it's in our set of - // known-emitted functions. - return (S.DeviceKnownEmittedFns.count(FD) > 0) - ? FunctionEmissionStatus::Emitted - : FunctionEmissionStatus::Unknown; -} - Sema::DeviceDiagBuilder Sema::diagIfOpenMPHostCode(SourceLocation Loc, unsigned DiagID) { assert(LangOpts.OpenMP && !LangOpts.OpenMPIsDevice && "Expected OpenMP host compilation."); - FunctionEmissionStatus FES = - isKnownHostEmitted(*this, getCurFunctionDecl()); + FunctionEmissionStatus FES = getEmissionStatus(getCurFunctionDecl()); DeviceDiagBuilder::Kind Kind = DeviceDiagBuilder::K_Nop; switch (FES) { case FunctionEmissionStatus::Emitted: @@ -1646,7 +1604,9 @@ case FunctionEmissionStatus::Unknown: Kind = DeviceDiagBuilder::K_Deferred; break; - case FunctionEmissionStatus::Discarded: + case FunctionEmissionStatus::TemplateDiscarded: + case FunctionEmissionStatus::OMPDiscarded: + case FunctionEmissionStatus::CUDADiscarded: Kind = DeviceDiagBuilder::K_Nop; break; } @@ -1663,31 +1623,34 @@ FunctionDecl *Caller = getCurFunctionDecl(); // host only function are not available on the device. - if (Caller && - (isKnownDeviceEmitted(*this, Caller) == FunctionEmissionStatus::Emitted || - (!isOpenMPDeviceDelayedContext(*this) && - isKnownDeviceEmitted(*this, Caller) == - FunctionEmissionStatus::Unknown)) && - isKnownDeviceEmitted(*this, Callee) == - FunctionEmissionStatus::Discarded) { - StringRef HostDevTy = - getOpenMPSimpleClauseTypeName(OMPC_device_type, OMPC_DEVICE_TYPE_host); - Diag(Loc, diag::err_omp_wrong_device_function_call) << HostDevTy << 0; - Diag(Callee->getAttr<OMPDeclareTargetDeclAttr>()->getLocation(), - diag::note_omp_marked_device_type_here) - << HostDevTy; - return; + if (Caller) { + FunctionEmissionStatus CallerS = getEmissionStatus(Caller); + FunctionEmissionStatus CalleeS = getEmissionStatus(Callee); + assert(CallerS != FunctionEmissionStatus::CUDADiscarded && + CalleeS != FunctionEmissionStatus::CUDADiscarded && + "CUDADiscarded unexpected in OpenMP device function check"); + if ((CallerS == FunctionEmissionStatus::Emitted || + (!isOpenMPDeviceDelayedContext(*this) && + CallerS == FunctionEmissionStatus::Unknown)) && + CalleeS == FunctionEmissionStatus::OMPDiscarded) { + StringRef HostDevTy = getOpenMPSimpleClauseTypeName( + OMPC_device_type, OMPC_DEVICE_TYPE_host); + Diag(Loc, diag::err_omp_wrong_device_function_call) << HostDevTy << 0; + Diag(Callee->getAttr<OMPDeclareTargetDeclAttr>()->getLocation(), + diag::note_omp_marked_device_type_here) + << HostDevTy; + return; + } } // If the caller is known-emitted, mark the callee as known-emitted. // Otherwise, mark the call in our call graph so we can traverse it later. if ((CheckForDelayedContext && !isOpenMPDeviceDelayedContext(*this)) || (!Caller && !CheckForDelayedContext) || - (Caller && - isKnownDeviceEmitted(*this, Caller) == FunctionEmissionStatus::Emitted)) + (Caller && getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted)) markKnownEmitted(*this, Caller, Callee, Loc, [CheckForDelayedContext](Sema &S, FunctionDecl *FD) { return CheckForDelayedContext && - isKnownDeviceEmitted(S, FD) == + S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted; }); else if (Caller) @@ -1703,29 +1666,38 @@ FunctionDecl *Caller = getCurFunctionDecl(); // device only function are not available on the host. - if (Caller && - isKnownHostEmitted(*this, Caller) == FunctionEmissionStatus::Emitted && - isKnownHostEmitted(*this, Callee) == FunctionEmissionStatus::Discarded) { - StringRef NoHostDevTy = getOpenMPSimpleClauseTypeName( - OMPC_device_type, OMPC_DEVICE_TYPE_nohost); - Diag(Loc, diag::err_omp_wrong_device_function_call) << NoHostDevTy << 1; - Diag(Callee->getAttr<OMPDeclareTargetDeclAttr>()->getLocation(), - diag::note_omp_marked_device_type_here) - << NoHostDevTy; - return; + if (Caller) { + FunctionEmissionStatus CallerS = getEmissionStatus(Caller); + FunctionEmissionStatus CalleeS = getEmissionStatus(Callee); + assert( + (LangOpts.CUDA || (CallerS != FunctionEmissionStatus::CUDADiscarded && + CalleeS != FunctionEmissionStatus::CUDADiscarded)) && + "CUDADiscarded unexpected in OpenMP host function check"); + if (CallerS == FunctionEmissionStatus::Emitted && + CalleeS == FunctionEmissionStatus::OMPDiscarded) { + StringRef NoHostDevTy = getOpenMPSimpleClauseTypeName( + OMPC_device_type, OMPC_DEVICE_TYPE_nohost); + Diag(Loc, diag::err_omp_wrong_device_function_call) << NoHostDevTy << 1; + Diag(Callee->getAttr<OMPDeclareTargetDeclAttr>()->getLocation(), + diag::note_omp_marked_device_type_here) + << NoHostDevTy; + return; + } } // If the caller is known-emitted, mark the callee as known-emitted. // Otherwise, mark the call in our call graph so we can traverse it later. - if ((!CheckCaller && !Caller) || - (Caller && - isKnownHostEmitted(*this, Caller) == FunctionEmissionStatus::Emitted)) - markKnownEmitted( - *this, Caller, Callee, Loc, [CheckCaller](Sema &S, FunctionDecl *FD) { - return CheckCaller && - isKnownHostEmitted(S, FD) == FunctionEmissionStatus::Emitted; - }); - else if (Caller) - DeviceCallGraph[Caller].insert({Callee, Loc}); + if (!shouldIgnoreInHostDeviceCheck(Callee)) { + if ((!CheckCaller && !Caller) || + (Caller && + getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted)) + markKnownEmitted( + *this, Caller, Callee, Loc, [CheckCaller](Sema &S, FunctionDecl *FD) { + return CheckCaller && + S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted; + }); + else if (Caller) + DeviceCallGraph[Caller].insert({Callee, Loc}); + } } void Sema::checkOpenMPDeviceExpr(const Expr *E) { Index: clang/lib/Sema/SemaDecl.cpp =================================================================== --- clang/lib/Sema/SemaDecl.cpp +++ clang/lib/Sema/SemaDecl.cpp @@ -17614,3 +17614,87 @@ Decl *Sema::getObjCDeclContext() const { return (dyn_cast_or_null<ObjCContainerDecl>(CurContext)); } + +Sema::FunctionEmissionStatus Sema::getEmissionStatus(FunctionDecl *FD) { + // Templates are emitted when they're instantiated. + if (FD->isDependentContext()) + return FunctionEmissionStatus::TemplateDiscarded; + + FunctionEmissionStatus OMPES = FunctionEmissionStatus::Unknown; + if (LangOpts.OpenMPIsDevice) { + Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); + if (DevTy.hasValue()) { + if (*DevTy == OMPDeclareTargetDeclAttr::DT_Host) + OMPES = FunctionEmissionStatus::OMPDiscarded; + else if (DeviceKnownEmittedFns.count(FD) > 0) + OMPES = FunctionEmissionStatus::Emitted; + } + } else if (LangOpts.OpenMP) { + // In OpenMP 4.5 all the functions are host functions. + if (LangOpts.OpenMP <= 45) { + OMPES = FunctionEmissionStatus::Emitted; + } else { + Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); + // In OpenMP 5.0 or above, DevTy may be changed later by + // #pragma omp declare target to(*) device_type(*). Therefore DevTy + // having no value does not imply host. The emission status will be + // checked again at the end of compilation unit. + if (DevTy.hasValue()) { + if (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) { + OMPES = FunctionEmissionStatus::OMPDiscarded; + } else if (DeviceKnownEmittedFns.count(FD) > 0) { + OMPES = FunctionEmissionStatus::Emitted; + } + } + } + } + if (OMPES == FunctionEmissionStatus::OMPDiscarded || + (OMPES == FunctionEmissionStatus::Emitted && !LangOpts.CUDA)) + return OMPES; + + if (LangOpts.CUDA) { + // When compiling for device, host functions are never emitted. Similarly, + // when compiling for host, device and global functions are never emitted. + // (Technically, we do emit a host-side stub for global functions, but this + // doesn't count for our purposes here.) + Sema::CUDAFunctionTarget T = IdentifyCUDATarget(FD); + if (LangOpts.CUDAIsDevice && T == Sema::CFT_Host) + return FunctionEmissionStatus::CUDADiscarded; + if (!LangOpts.CUDAIsDevice && + (T == Sema::CFT_Device || T == Sema::CFT_Global)) + return FunctionEmissionStatus::CUDADiscarded; + + // Check whether this function is externally visible -- if so, it's + // known-emitted. + // + // We have to check the GVA linkage of the function's *definition* -- if we + // only have a declaration, we don't know whether or not the function will + // be emitted, because (say) the definition could include "inline". + FunctionDecl *Def = FD->getDefinition(); + + if (Def && + !isDiscardableGVALinkage(getASTContext().GetGVALinkageForFunction(Def)) + && (!LangOpts.OpenMP || OMPES == FunctionEmissionStatus::Emitted)) + return FunctionEmissionStatus::Emitted; + } + + // Otherwise, the function is known-emitted if it's in our set of + // known-emitted functions. + return (DeviceKnownEmittedFns.count(FD) > 0) + ? FunctionEmissionStatus::Emitted + : FunctionEmissionStatus::Unknown; +} + +bool Sema::shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee) { + // Host-side references to a __global__ function refer to the stub, so the + // function itself is never emitted and therefore should not be marked. + // If we have host fn calls kernel fn calls host+device, the HD function + // does not get instantiated on the host. We model this by omitting at the + // call to the kernel from the callgraph. This ensures that, when compiling + // for host, only HD functions actually called from the host get marked as + // known-emitted. + return LangOpts.CUDA && !LangOpts.CUDAIsDevice && + IdentifyCUDATarget(Callee) == CFT_Global; +} Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -600,40 +600,6 @@ NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); } -// Do we know that we will eventually codegen the given function? -static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) { - // Templates are emitted when they're instantiated. - if (FD->isDependentContext()) - return false; - - // When compiling for device, host functions are never emitted. Similarly, - // when compiling for host, device and global functions are never emitted. - // (Technically, we do emit a host-side stub for global functions, but this - // doesn't count for our purposes here.) - Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD); - if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host) - return false; - if (!S.getLangOpts().CUDAIsDevice && - (T == Sema::CFT_Device || T == Sema::CFT_Global)) - return false; - - // Check whether this function is externally visible -- if so, it's - // known-emitted. - // - // We have to check the GVA linkage of the function's *definition* -- if we - // only have a declaration, we don't know whether or not the function will be - // emitted, because (say) the definition could include "inline". - FunctionDecl *Def = FD->getDefinition(); - - if (Def && - !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def))) - return true; - - // Otherwise, the function is known-emitted if it's in our set of - // known-emitted functions. - return S.DeviceKnownEmittedFns.count(FD) > 0; -} - Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, unsigned DiagID) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); @@ -647,7 +613,8 @@ // device code if we're compiling for device. Defer any errors in device // mode until the function is known-emitted. if (getLangOpts().CUDAIsDevice) { - return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) + return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == + FunctionEmissionStatus::Emitted) ? DeviceDiagBuilder::K_ImmediateWithCallStack : DeviceDiagBuilder::K_Deferred; } @@ -675,7 +642,8 @@ if (getLangOpts().CUDAIsDevice) return DeviceDiagBuilder::K_Nop; - return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) + return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == + FunctionEmissionStatus::Emitted) ? DeviceDiagBuilder::K_ImmediateWithCallStack : DeviceDiagBuilder::K_Deferred; default: @@ -702,12 +670,16 @@ // If the caller is known-emitted, mark the callee as known-emitted. // Otherwise, mark the call in our call graph so we can traverse it later. - bool CallerKnownEmitted = IsKnownEmitted(*this, Caller); + bool CallerKnownEmitted = + getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; if (CallerKnownEmitted) { // Host-side references to a __global__ function refer to the stub, so the // function itself is never emitted and therefore should not be marked. - if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) - markKnownEmitted(*this, Caller, Callee, Loc, IsKnownEmitted); + if (!shouldIgnoreInHostDeviceCheck(Callee)) + markKnownEmitted( + *this, Caller, Callee, Loc, [](Sema &S, FunctionDecl *FD) { + return S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted; + }); } else { // If we have // host fn calls kernel fn calls host+device, @@ -715,7 +687,7 @@ // omitting at the call to the kernel from the callgraph. This ensures // that, when compiling for host, only HD functions actually called from the // host get marked as known-emitted. - if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) + if (!shouldIgnoreInHostDeviceCheck(Callee)) DeviceCallGraph[Caller].insert({Callee, Loc}); } Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -3451,6 +3451,19 @@ bool DiagnoseMissing); bool isKnownName(StringRef name); + /// Status of the function emission on the CUDA/HIP/OpenMP host/device attrs. + enum class FunctionEmissionStatus { + Emitted, + CUDADiscarded, // Discarded due to CUDA/HIP hostness + OMPDiscarded, // Discarded due to OpenMP hostness + TemplateDiscarded, // Discarded due to uninstantiated templates + Unknown, + }; + FunctionEmissionStatus getEmissionStatus(FunctionDecl *Decl); + + // Whether the callee should be ignored in CUDA/HIP/OpenMP host/device check. + bool shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee); + void ArgumentDependentLookup(DeclarationName Name, SourceLocation Loc, ArrayRef<Expr *> Args, ADLResult &Functions);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits