yaxunl updated this revision to Diff 223712.
yaxunl marked 14 inline comments as done.
yaxunl added a comment.

revised by Alexey's comments.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67837/new/

https://reviews.llvm.org/D67837

Files:
  include/clang/Sema/Sema.h
  lib/Sema/SemaCUDA.cpp
  lib/Sema/SemaDecl.cpp
  lib/Sema/SemaOpenMP.cpp
  test/CodeGenCUDA/openmp-target.cu
  test/OpenMP/declare_target_messages.cpp
  test/SemaCUDA/call-device-fn-from-host.cu
  test/SemaCUDA/host-device-constexpr.cu
  test/SemaCUDA/openmp-static-func.cu
  test/SemaCUDA/openmp-target.cu

Index: test/SemaCUDA/openmp-target.cu
===================================================================
--- /dev/null
+++ test/SemaCUDA/openmp-target.cu
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -triple x86_64 -verify=expected,dev \
+// RUN:            -verify-ignore-unexpected=note \
+// RUN:            -fopenmp -fopenmp-version=50 -o - %s
+// RUN: %clang_cc1 -triple x86_64 -verify -verify-ignore-unexpected=note\
+// RUN:            -fopenmp -fopenmp-version=50 -o - -x c++ %s
+// RUN: %clang_cc1 -triple x86_64 -verify=dev -verify-ignore-unexpected=note\
+// RUN:            -fcuda-is-device -o - %s
+
+#if __CUDA__
+#include "Inputs/cuda.h"
+__device__ void cu_devf();
+#endif
+
+void bazz() {}
+#pragma omp declare target to(bazz) device_type(nohost)
+void bazzz() {bazz();}
+#pragma omp declare target to(bazzz) device_type(nohost)
+void any() {bazz();} // expected-error {{function with 'device_type(nohost)' is not available on host}}
+void host1() {bazz();}
+#pragma omp declare target to(host1) device_type(host)
+void host2() {bazz();}
+#pragma omp declare target to(host2)
+void device() {host1();}
+#pragma omp declare target to(device) device_type(nohost)
+void host3() {host1();}
+#pragma omp declare target to(host3)
+
+#pragma omp declare target
+void any1() {any();}
+void any2() {host1();}
+void any3() {device();} // expected-error {{function with 'device_type(nohost)' is not available on host}}
+void any4() {any2();}
+#pragma omp end declare target
+
+void any5() {any();}
+void any6() {host1();}
+void any7() {device();} // expected-error {{function with 'device_type(nohost)' is not available on host}}
+void any8() {any2();}
+
+#if __CUDA__
+void cu_hostf() { cu_devf(); } // dev-error {{no matching function for call to 'cu_devf'}}
+__device__ void cu_devf2() { cu_hostf(); } // dev-error{{no matching function for call to 'cu_hostf'}}
+#endif
Index: test/SemaCUDA/openmp-static-func.cu
===================================================================
--- /dev/null
+++ test/SemaCUDA/openmp-static-func.cu
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only \
+// RUN:            -verify -fopenmp %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only \
+// RUN:            -verify -fopenmp -x hip %s
+// expected-no-diagnostics
+
+// Tests there is no assertion in Sema::markKnownEmitted when fopenmp is used
+// with CUDA/HIP host compilation.
+
+static void f() {}
+
+static void g() { f(); }
+
+static void h() { g(); }
Index: test/SemaCUDA/host-device-constexpr.cu
===================================================================
--- test/SemaCUDA/host-device-constexpr.cu
+++ test/SemaCUDA/host-device-constexpr.cu
@@ -1,5 +1,10 @@
 // RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s
-// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s -fcuda-is-device
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s \
+// RUN:            -fcuda-is-device
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs \
+// RUN:            -fopenmp %s
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs \
+// RUN:            -fopenmp %s -fcuda-is-device
 
 #include "Inputs/cuda.h"
 
Index: test/SemaCUDA/call-device-fn-from-host.cu
===================================================================
--- test/SemaCUDA/call-device-fn-from-host.cu
+++ test/SemaCUDA/call-device-fn-from-host.cu
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \
 // RUN:   -verify -verify-ignore-unexpected=note
+// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \
+// RUN:   -verify -verify-ignore-unexpected=note -fopenmp
 
 // Note: This test won't work with -fsyntax-only, because some of these errors
 // are emitted during codegen.
Index: test/OpenMP/declare_target_messages.cpp
===================================================================
--- test/OpenMP/declare_target_messages.cpp
+++ test/OpenMP/declare_target_messages.cpp
@@ -162,10 +162,10 @@
 #pragma omp declare target link(x) // expected-error {{'x' must not appear in both clauses 'to' and 'link'}}
 
 void bazz() {}
-#pragma omp declare target to(bazz) device_type(nohost) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}}
+#pragma omp declare target to(bazz) device_type(nohost) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} host5-note {{marked as 'device_type(nohost)' here}}
 void bazzz() {bazz();}
 #pragma omp declare target to(bazzz) device_type(nohost) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}}
-void any() {bazz();}
+void any() {bazz();} // host5-error {{function with 'device_type(nohost)' is not available on host}}
 void host1() {bazz();}
 #pragma omp declare target to(host1) device_type(host) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} dev5-note 2 {{marked as 'device_type(host)' here}}
 void host2() {bazz();}
Index: test/CodeGenCUDA/openmp-target.cu
===================================================================
--- /dev/null
+++ test/CodeGenCUDA/openmp-target.cu
@@ -0,0 +1,20 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm \
+// RUN:            -fopenmp -fopenmp-version=50 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm \
+// RUN:            -fopenmp -fopenmp-version=50 -o - -x c++ %s | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \
+// RUN:            -emit-llvm -o - %s | FileCheck -check-prefixes=DEV %s
+
+// CHECK: declare{{.*}}@_Z7nohost1v()
+// DEV-NOT: _Z7nohost1v
+void nohost1() {}
+#pragma omp declare target to(nohost1) device_type(nohost)
+
+// CHECK: declare{{.*}}@_Z7nohost2v()
+// DEV-NOT: _Z7nohost2v
+void nohost2() {nohost1();}
+#pragma omp declare target to(nohost2) device_type(nohost)
+
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -1565,34 +1565,11 @@
 };
 } // anonymous namespace
 
-/// Do we know that we will eventually codegen the given function?
-static FunctionEmissionStatus isKnownDeviceEmitted(Sema &S, FunctionDecl *FD) {
-  assert(S.LangOpts.OpenMP && S.LangOpts.OpenMPIsDevice &&
-         "Expected OpenMP device compilation.");
-  // Templates are emitted when they're instantiated.
-  if (FD->isDependentContext())
-    return FunctionEmissionStatus::Discarded;
-
-  Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
-      OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl());
-  if (DevTy.hasValue())
-    return (*DevTy == OMPDeclareTargetDeclAttr::DT_Host)
-               ? FunctionEmissionStatus::Discarded
-               : FunctionEmissionStatus::Emitted;
-
-  // Otherwise, the function is known-emitted if it's in our set of
-  // known-emitted functions.
-  return (S.DeviceKnownEmittedFns.count(FD) > 0)
-             ? FunctionEmissionStatus::Emitted
-             : FunctionEmissionStatus::Unknown;
-}
-
 Sema::DeviceDiagBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc,
                                                      unsigned DiagID) {
   assert(LangOpts.OpenMP && LangOpts.OpenMPIsDevice &&
          "Expected OpenMP device compilation.");
-  FunctionEmissionStatus FES =
-      isKnownDeviceEmitted(*this, getCurFunctionDecl());
+  FunctionEmissionStatus FES = getEmissionStatus(getCurFunctionDecl());
   DeviceDiagBuilder::Kind Kind = DeviceDiagBuilder::K_Nop;
   switch (FES) {
   case FunctionEmissionStatus::Emitted:
@@ -1602,42 +1579,23 @@
     Kind = isOpenMPDeviceDelayedContext(*this) ? DeviceDiagBuilder::K_Deferred
                                                : DeviceDiagBuilder::K_Immediate;
     break;
-  case FunctionEmissionStatus::Discarded:
+  case FunctionEmissionStatus::TemplateDiscarded:
+  case FunctionEmissionStatus::OMPDiscarded:
     Kind = DeviceDiagBuilder::K_Nop;
     break;
+  case FunctionEmissionStatus::CUDADiscarded:
+    llvm_unreachable("CUDADiscarded unexpected in OpenMP device compilation");
+    break;
   }
 
   return DeviceDiagBuilder(Kind, Loc, DiagID, getCurFunctionDecl(), *this);
 }
 
-/// Do we know that we will eventually codegen the given function?
-static FunctionEmissionStatus isKnownHostEmitted(Sema &S, FunctionDecl *FD) {
-  assert(S.LangOpts.OpenMP && !S.LangOpts.OpenMPIsDevice &&
-         "Expected OpenMP host compilation.");
-  // In OpenMP 4.5 all the functions are host functions.
-  if (S.LangOpts.OpenMP <= 45)
-    return FunctionEmissionStatus::Emitted;
-
-  Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
-      OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl());
-  if (DevTy.hasValue())
-    return (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
-               ? FunctionEmissionStatus::Discarded
-               : FunctionEmissionStatus::Emitted;
-
-  // Otherwise, the function is known-emitted if it's in our set of
-  // known-emitted functions.
-  return (S.DeviceKnownEmittedFns.count(FD) > 0)
-             ? FunctionEmissionStatus::Emitted
-             : FunctionEmissionStatus::Unknown;
-}
-
 Sema::DeviceDiagBuilder Sema::diagIfOpenMPHostCode(SourceLocation Loc,
                                                    unsigned DiagID) {
   assert(LangOpts.OpenMP && !LangOpts.OpenMPIsDevice &&
          "Expected OpenMP host compilation.");
-  FunctionEmissionStatus FES =
-      isKnownHostEmitted(*this, getCurFunctionDecl());
+  FunctionEmissionStatus FES = getEmissionStatus(getCurFunctionDecl());
   DeviceDiagBuilder::Kind Kind = DeviceDiagBuilder::K_Nop;
   switch (FES) {
   case FunctionEmissionStatus::Emitted:
@@ -1646,7 +1604,9 @@
   case FunctionEmissionStatus::Unknown:
     Kind = DeviceDiagBuilder::K_Deferred;
     break;
-  case FunctionEmissionStatus::Discarded:
+  case FunctionEmissionStatus::TemplateDiscarded:
+  case FunctionEmissionStatus::OMPDiscarded:
+  case FunctionEmissionStatus::CUDADiscarded:
     Kind = DeviceDiagBuilder::K_Nop;
     break;
   }
@@ -1663,31 +1623,34 @@
   FunctionDecl *Caller = getCurFunctionDecl();
 
   // host only function are not available on the device.
-  if (Caller &&
-      (isKnownDeviceEmitted(*this, Caller) == FunctionEmissionStatus::Emitted ||
-       (!isOpenMPDeviceDelayedContext(*this) &&
-        isKnownDeviceEmitted(*this, Caller) ==
-            FunctionEmissionStatus::Unknown)) &&
-      isKnownDeviceEmitted(*this, Callee) ==
-          FunctionEmissionStatus::Discarded) {
-    StringRef HostDevTy =
-        getOpenMPSimpleClauseTypeName(OMPC_device_type, OMPC_DEVICE_TYPE_host);
-    Diag(Loc, diag::err_omp_wrong_device_function_call) << HostDevTy << 0;
-    Diag(Callee->getAttr<OMPDeclareTargetDeclAttr>()->getLocation(),
-         diag::note_omp_marked_device_type_here)
-        << HostDevTy;
-    return;
+  if (Caller) {
+    auto CallerS = getEmissionStatus(Caller);
+    auto CalleeS = getEmissionStatus(Callee);
+    assert(CallerS != FunctionEmissionStatus::CUDADiscarded &&
+           CalleeS != FunctionEmissionStatus::CUDADiscarded &&
+           "CUDADiscarded unexpected in OpenMP device function check");
+    if ((CallerS == FunctionEmissionStatus::Emitted ||
+         (!isOpenMPDeviceDelayedContext(*this) &&
+          CallerS == FunctionEmissionStatus::Unknown)) &&
+        CalleeS == FunctionEmissionStatus::OMPDiscarded) {
+      StringRef HostDevTy = getOpenMPSimpleClauseTypeName(
+          OMPC_device_type, OMPC_DEVICE_TYPE_host);
+      Diag(Loc, diag::err_omp_wrong_device_function_call) << HostDevTy << 0;
+      Diag(Callee->getAttr<OMPDeclareTargetDeclAttr>()->getLocation(),
+           diag::note_omp_marked_device_type_here)
+          << HostDevTy;
+      return;
+    }
   }
   // If the caller is known-emitted, mark the callee as known-emitted.
   // Otherwise, mark the call in our call graph so we can traverse it later.
   if ((CheckForDelayedContext && !isOpenMPDeviceDelayedContext(*this)) ||
       (!Caller && !CheckForDelayedContext) ||
-      (Caller &&
-       isKnownDeviceEmitted(*this, Caller) == FunctionEmissionStatus::Emitted))
+      (Caller && getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted))
     markKnownEmitted(*this, Caller, Callee, Loc,
                      [CheckForDelayedContext](Sema &S, FunctionDecl *FD) {
                        return CheckForDelayedContext &&
-                              isKnownDeviceEmitted(S, FD) ==
+                              S.getEmissionStatus(FD) ==
                                   FunctionEmissionStatus::Emitted;
                      });
   else if (Caller)
@@ -1703,29 +1666,38 @@
   FunctionDecl *Caller = getCurFunctionDecl();
 
   // device only function are not available on the host.
-  if (Caller &&
-      isKnownHostEmitted(*this, Caller) == FunctionEmissionStatus::Emitted &&
-      isKnownHostEmitted(*this, Callee) == FunctionEmissionStatus::Discarded) {
-    StringRef NoHostDevTy = getOpenMPSimpleClauseTypeName(
-        OMPC_device_type, OMPC_DEVICE_TYPE_nohost);
-    Diag(Loc, diag::err_omp_wrong_device_function_call) << NoHostDevTy << 1;
-    Diag(Callee->getAttr<OMPDeclareTargetDeclAttr>()->getLocation(),
-         diag::note_omp_marked_device_type_here)
-        << NoHostDevTy;
-    return;
+  if (Caller) {
+    auto CallerS = getEmissionStatus(Caller);
+    auto CalleeS = getEmissionStatus(Callee);
+    assert(
+        (LangOpts.CUDA || (CallerS != FunctionEmissionStatus::CUDADiscarded &&
+                           CalleeS != FunctionEmissionStatus::CUDADiscarded)) &&
+        "CUDADiscarded unexpected in OpenMP host function check");
+    if (CallerS == FunctionEmissionStatus::Emitted &&
+        CalleeS == FunctionEmissionStatus::OMPDiscarded) {
+      StringRef NoHostDevTy = getOpenMPSimpleClauseTypeName(
+          OMPC_device_type, OMPC_DEVICE_TYPE_nohost);
+      Diag(Loc, diag::err_omp_wrong_device_function_call) << NoHostDevTy << 1;
+      Diag(Callee->getAttr<OMPDeclareTargetDeclAttr>()->getLocation(),
+           diag::note_omp_marked_device_type_here)
+          << NoHostDevTy;
+      return;
+    }
   }
   // If the caller is known-emitted, mark the callee as known-emitted.
   // Otherwise, mark the call in our call graph so we can traverse it later.
-  if ((!CheckCaller && !Caller) ||
-      (Caller &&
-       isKnownHostEmitted(*this, Caller) == FunctionEmissionStatus::Emitted))
-    markKnownEmitted(
-        *this, Caller, Callee, Loc, [CheckCaller](Sema &S, FunctionDecl *FD) {
-          return CheckCaller &&
-                 isKnownHostEmitted(S, FD) == FunctionEmissionStatus::Emitted;
-        });
-  else if (Caller)
-    DeviceCallGraph[Caller].insert({Callee, Loc});
+  if (!shouldIgnoreInHostDeviceCheck(Callee)) {
+    if ((!CheckCaller && !Caller) ||
+        (Caller &&
+         getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted))
+      markKnownEmitted(
+          *this, Caller, Callee, Loc, [CheckCaller](Sema &S, FunctionDecl *FD) {
+            return CheckCaller &&
+                   S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted;
+          });
+    else if (Caller)
+      DeviceCallGraph[Caller].insert({Callee, Loc});
+  }
 }
 
 void Sema::checkOpenMPDeviceExpr(const Expr *E) {
Index: lib/Sema/SemaDecl.cpp
===================================================================
--- lib/Sema/SemaDecl.cpp
+++ lib/Sema/SemaDecl.cpp
@@ -17613,3 +17613,87 @@
 Decl *Sema::getObjCDeclContext() const {
   return (dyn_cast_or_null<ObjCContainerDecl>(CurContext));
 }
+
+Sema::FunctionEmissionStatus Sema::getEmissionStatus(FunctionDecl *FD) {
+  // Templates are emitted when they're instantiated.
+  if (FD->isDependentContext())
+    return FunctionEmissionStatus::TemplateDiscarded;
+
+  FunctionEmissionStatus OMPES = FunctionEmissionStatus::Unknown;
+  if (LangOpts.OpenMPIsDevice) {
+    Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
+        OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl());
+    if (DevTy.hasValue()) {
+      if (*DevTy == OMPDeclareTargetDeclAttr::DT_Host)
+        OMPES = FunctionEmissionStatus::OMPDiscarded;
+      else if (DeviceKnownEmittedFns.count(FD) > 0)
+        OMPES = FunctionEmissionStatus::Emitted;
+    }
+  } else if (LangOpts.OpenMP) {
+    // In OpenMP 4.5 all the functions are host functions.
+    if (LangOpts.OpenMP <= 45) {
+      OMPES = FunctionEmissionStatus::Emitted;
+    } else {
+      Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
+          OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl());
+      // In OpenMP 5.0 or above, DevTy may be changed later by
+      // #pragma omp declare target to(*) device_type(*). Therefore DevTy
+      // having no value does not imply host. The emission status will be
+      // checked again at the end of compilation unit.
+      if (DevTy.hasValue()) {
+        if (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) {
+          OMPES = FunctionEmissionStatus::OMPDiscarded;
+        } else if (DeviceKnownEmittedFns.count(FD) > 0) {
+          OMPES = FunctionEmissionStatus::Emitted;
+        }
+      }
+    }
+  }
+  if (OMPES == FunctionEmissionStatus::OMPDiscarded ||
+      (OMPES == FunctionEmissionStatus::Emitted && !LangOpts.CUDA))
+    return OMPES;
+
+  if (LangOpts.CUDA) {
+    // When compiling for device, host functions are never emitted.  Similarly,
+    // when compiling for host, device and global functions are never emitted.
+    // (Technically, we do emit a host-side stub for global functions, but this
+    // doesn't count for our purposes here.)
+    Sema::CUDAFunctionTarget T = IdentifyCUDATarget(FD);
+    if (LangOpts.CUDAIsDevice && T == Sema::CFT_Host)
+      return FunctionEmissionStatus::CUDADiscarded;
+    if (!LangOpts.CUDAIsDevice &&
+        (T == Sema::CFT_Device || T == Sema::CFT_Global))
+      return FunctionEmissionStatus::CUDADiscarded;
+
+    // Check whether this function is externally visible -- if so, it's
+    // known-emitted.
+    //
+    // We have to check the GVA linkage of the function's *definition* -- if we
+    // only have a declaration, we don't know whether or not the function will
+    // be emitted, because (say) the definition could include "inline".
+    FunctionDecl *Def = FD->getDefinition();
+
+    if (Def &&
+        !isDiscardableGVALinkage(getASTContext().GetGVALinkageForFunction(Def))
+        && (!LangOpts.OpenMP || OMPES == FunctionEmissionStatus::Emitted))
+      return FunctionEmissionStatus::Emitted;
+  }
+
+  // Otherwise, the function is known-emitted if it's in our set of
+  // known-emitted functions.
+  return (DeviceKnownEmittedFns.count(FD) > 0)
+             ? FunctionEmissionStatus::Emitted
+             : FunctionEmissionStatus::Unknown;
+}
+
+bool Sema::shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee) {
+  // Host-side references to a __global__ function refer to the stub, so the
+  // function itself is never emitted and therefore should not be marked.
+  // If we have host fn calls kernel fn calls host+device, the HD function
+  // does not get instantiated on the host. We model this by omitting at the
+  // call to the kernel from the callgraph. This ensures that, when compiling
+  // for host, only HD functions actually called from the host get marked as
+  // known-emitted.
+  return LangOpts.CUDA && !LangOpts.CUDAIsDevice &&
+         IdentifyCUDATarget(Callee) == CFT_Global;
+}
Index: lib/Sema/SemaCUDA.cpp
===================================================================
--- lib/Sema/SemaCUDA.cpp
+++ lib/Sema/SemaCUDA.cpp
@@ -600,40 +600,6 @@
   NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
 }
 
-// Do we know that we will eventually codegen the given function?
-static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) {
-  // Templates are emitted when they're instantiated.
-  if (FD->isDependentContext())
-    return false;
-
-  // When compiling for device, host functions are never emitted.  Similarly,
-  // when compiling for host, device and global functions are never emitted.
-  // (Technically, we do emit a host-side stub for global functions, but this
-  // doesn't count for our purposes here.)
-  Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD);
-  if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host)
-    return false;
-  if (!S.getLangOpts().CUDAIsDevice &&
-      (T == Sema::CFT_Device || T == Sema::CFT_Global))
-    return false;
-
-  // Check whether this function is externally visible -- if so, it's
-  // known-emitted.
-  //
-  // We have to check the GVA linkage of the function's *definition* -- if we
-  // only have a declaration, we don't know whether or not the function will be
-  // emitted, because (say) the definition could include "inline".
-  FunctionDecl *Def = FD->getDefinition();
-
-  if (Def &&
-      !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def)))
-    return true;
-
-  // Otherwise, the function is known-emitted if it's in our set of
-  // known-emitted functions.
-  return S.DeviceKnownEmittedFns.count(FD) > 0;
-}
-
 Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
                                                    unsigned DiagID) {
   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
@@ -647,7 +613,8 @@
       // device code if we're compiling for device.  Defer any errors in device
       // mode until the function is known-emitted.
       if (getLangOpts().CUDAIsDevice) {
-        return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
+        return (getEmissionStatus(dyn_cast<FunctionDecl>(CurContext)) ==
+                FunctionEmissionStatus::Emitted)
                    ? DeviceDiagBuilder::K_ImmediateWithCallStack
                    : DeviceDiagBuilder::K_Deferred;
       }
@@ -675,7 +642,8 @@
       if (getLangOpts().CUDAIsDevice)
         return DeviceDiagBuilder::K_Nop;
 
-      return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
+      return (getEmissionStatus(dyn_cast<FunctionDecl>(CurContext)) ==
+              FunctionEmissionStatus::Emitted)
                  ? DeviceDiagBuilder::K_ImmediateWithCallStack
                  : DeviceDiagBuilder::K_Deferred;
     default:
@@ -702,12 +670,16 @@
 
   // If the caller is known-emitted, mark the callee as known-emitted.
   // Otherwise, mark the call in our call graph so we can traverse it later.
-  bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
+  bool CallerKnownEmitted =
+      getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted;
   if (CallerKnownEmitted) {
     // Host-side references to a __global__ function refer to the stub, so the
     // function itself is never emitted and therefore should not be marked.
-    if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
-      markKnownEmitted(*this, Caller, Callee, Loc, IsKnownEmitted);
+    if (!shouldIgnoreInHostDeviceCheck(Callee))
+      markKnownEmitted(
+          *this, Caller, Callee, Loc, [](Sema &S, FunctionDecl *FD) {
+            return S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted;
+          });
   } else {
     // If we have
     //   host fn calls kernel fn calls host+device,
@@ -715,7 +687,7 @@
     // omitting at the call to the kernel from the callgraph.  This ensures
     // that, when compiling for host, only HD functions actually called from the
     // host get marked as known-emitted.
-    if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
+    if (!shouldIgnoreInHostDeviceCheck(Callee))
       DeviceCallGraph[Caller].insert({Callee, Loc});
   }
 
Index: include/clang/Sema/Sema.h
===================================================================
--- include/clang/Sema/Sema.h
+++ include/clang/Sema/Sema.h
@@ -3458,6 +3458,19 @@
                                                     bool DiagnoseMissing);
   bool isKnownName(StringRef name);
 
+  /// Status of the function emission on the CUDA/HIP/OpenMP host/device attrs.
+  enum class FunctionEmissionStatus {
+    Emitted,
+    CUDADiscarded,     // Discarded due to CUDA/HIP hostness
+    OMPDiscarded,      // Discarded due to OpenMP hostness
+    TemplateDiscarded, // Discarded due to uninstantiated templates
+    Unknown,
+  };
+  FunctionEmissionStatus getEmissionStatus(FunctionDecl *Decl);
+
+  // Whether the callee should be ignored in CUDA/HIP/OpenMP host/device check.
+  bool shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee);
+
   void ArgumentDependentLookup(DeclarationName Name, SourceLocation Loc,
                                ArrayRef<Expr *> Args, ADLResult &Functions);
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to