yaxunl updated this revision to Diff 262858.
yaxunl edited the summary of this revision.
yaxunl added a comment.

fix regression. only treat implicit host device candidate inferior in device 
compilation.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79526/new/

https://reviews.llvm.org/D79526

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/Sema/SemaCUDA.cpp
  clang/lib/Sema/SemaOverload.cpp
  clang/test/SemaCUDA/function-overload.cu

Index: clang/test/SemaCUDA/function-overload.cu
===================================================================
--- clang/test/SemaCUDA/function-overload.cu
+++ clang/test/SemaCUDA/function-overload.cu
@@ -463,3 +463,43 @@
 void foo() {
   __test<int>();
 }
+
+// Test resolving implicit host device candidate vs wrong-sided candidate.
+// In device compilation, implicit host device caller choose implicit host
+// device candidate and wrong-sided candidate with equal preference.
+namespace ImplicitHostDeviceVsWrongSided {
+inline double callee(double x);
+#pragma clang force_cuda_host_device begin
+inline void callee(int x);
+inline double implicit_hd_caller() {
+  return callee(1.0);
+}
+#pragma clang force_cuda_host_device end
+}
+
+// Test resolving implicit host device candidate vs wrong-sided candidate.
+// In host compilation, implicit host device caller choose implicit host
+// device candidate and same-sided candidate with equal preference.
+namespace ImplicitHostDeviceVsWrongSided2 {
+inline void callee(int x);
+#pragma clang force_cuda_host_device begin
+inline double callee(double x);
+inline double implicit_hd_caller() {
+  return callee(1.0);
+}
+#pragma clang force_cuda_host_device end
+}
+
+// Test resolving explicit host device candidate vs. wrong-sided candidate.
+// Explicit host device caller favors host device candidate against wrong-sided
+// candidate.
+namespace ExplicitHostDeviceVsWrongSided {
+inline double callee(double x);
+inline __host__ __device__ void callee(int x);
+inline __host__ __device__ double explicit_hd_caller() {
+  return callee(1.0);
+#if __CUDA_ARCH__
+  // expected-error@-2 {{cannot initialize return object of type 'double' with an rvalue of type 'void'}}
+#endif
+}
+}
Index: clang/lib/Sema/SemaOverload.cpp
===================================================================
--- clang/lib/Sema/SemaOverload.cpp
+++ clang/lib/Sema/SemaOverload.cpp
@@ -9517,11 +9517,29 @@
   // in global variable initializers once proper context is added.
   if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
     if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) {
-      auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function);
-      auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function);
+      bool IsCallerImplicitHD = false;
+      bool IsCand1ImplicitHD = false;
+      bool IsCand2ImplicitHD = false;
+      S.IdentifyCUDATarget(Caller, /*IgnoreImplicitHD=*/false,
+                           &IsCallerImplicitHD);
+      auto P1 =
+          S.IdentifyCUDAPreference(Caller, Cand1.Function, &IsCand1ImplicitHD);
+      auto P2 =
+          S.IdentifyCUDAPreference(Caller, Cand2.Function, &IsCand2ImplicitHD);
       assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never);
-      auto Cand1Emittable = P1 > Sema::CFP_WrongSide;
-      auto Cand2Emittable = P2 > Sema::CFP_WrongSide;
+      // The implicit HD function may be a function in a system header which
+      // is forced by pragma. In device compilation, if we prefer HD candidates
+      // over wrong-sided candidates, overloading resolution may change, which
+      // may result in non-deferrable diagnostics. As a workaround, we let
+      // implicit HD candidates take equal preference as wrong-sided candidates.
+      // This will preserve the overloading resolution.
+      auto EmitThreshold =
+          (S.getLangOpts().CUDAIsDevice && IsCallerImplicitHD &&
+           (IsCand1ImplicitHD || IsCand2ImplicitHD))
+              ? Sema::CFP_HostDevice
+              : Sema::CFP_WrongSide;
+      auto Cand1Emittable = P1 > EmitThreshold;
+      auto Cand2Emittable = P2 > EmitThreshold;
       if (Cand1Emittable && !Cand2Emittable)
         return true;
       if (!Cand1Emittable && Cand2Emittable)
Index: clang/lib/Sema/SemaCUDA.cpp
===================================================================
--- clang/lib/Sema/SemaCUDA.cpp
+++ clang/lib/Sema/SemaCUDA.cpp
@@ -95,17 +95,25 @@
   return CFT_Host;
 }
 
-template <typename A>
-static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
-  return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
-           return isa<A>(Attribute) &&
-                  !(IgnoreImplicitAttr && Attribute->isImplicit());
-         });
+template <typename AttrT>
+static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr,
+                    bool *IsImplicitHDAttr = nullptr) {
+  if (auto *A = D->getAttr<AttrT>()) {
+    if (A->isImplicit()) {
+      if (IsImplicitHDAttr)
+        *IsImplicitHDAttr = true;
+      if (IgnoreImplicitAttr)
+        return false;
+    }
+    return true;
+  }
+  return false;
 }
 
 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
-                                                  bool IgnoreImplicitHDAttr) {
+                                                  bool IgnoreImplicitHDAttr,
+                                                  bool *IsImplicitHDAttr) {
   // Code that lives outside a function is run on the host.
   if (D == nullptr)
     return CFT_Host;
@@ -116,15 +124,23 @@
   if (D->hasAttr<CUDAGlobalAttr>())
     return CFT_Global;
 
-  if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
-    if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
+  bool IsImplicitDevAttr = false;
+  bool IsImplicitHostAttr = false;
+  if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr, &IsImplicitDevAttr)) {
+    if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr, &IsImplicitHostAttr)) {
+      assert(IsImplicitDevAttr == IsImplicitHostAttr);
+      if (IsImplicitHDAttr)
+        *IsImplicitHDAttr = IsImplicitDevAttr && IsImplicitHostAttr;
       return CFT_HostDevice;
+    }
     return CFT_Device;
   } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
     return CFT_Host;
   } else if (D->isImplicit() && !IgnoreImplicitHDAttr) {
     // Some implicit declarations (like intrinsic functions) are not marked.
     // Set the most lenient target on them for maximal flexibility.
+    if (IsImplicitHDAttr)
+      *IsImplicitHDAttr = true;
     return CFT_HostDevice;
   }
 
@@ -161,10 +177,12 @@
 
 Sema::CUDAFunctionPreference
 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
-                             const FunctionDecl *Callee) {
+                             const FunctionDecl *Callee, bool *IsImplicitHD) {
   assert(Callee && "Callee must be valid.");
   CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
-  CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
+  CUDAFunctionTarget CalleeTarget =
+      IdentifyCUDATarget(Callee,
+                         /*IgnoreImplicitHD=*/false, IsImplicitHD);
 
   // If one of the targets is invalid, the check always fails, no matter what
   // the other target is.
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -11659,7 +11659,8 @@
   /// Use this rather than examining the function's attributes yourself -- you
   /// will get it wrong.  Returns CFT_Host if D is null.
   CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D,
-                                        bool IgnoreImplicitHDAttr = false);
+                                        bool IgnoreImplicitHDAttr = false,
+                                        bool *IsImplicitHDAttr = nullptr);
   CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs);
 
   /// Gets the CUDA target for the current context.
@@ -11686,9 +11687,12 @@
   ///               nullptr in case of global context.
   /// \param Callee target function
   ///
+  /// \param IsImplicitHD callee is an implicit host device function
+  ///
   /// \returns preference value for particular Caller/Callee combination.
   CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller,
-                                                const FunctionDecl *Callee);
+                                                const FunctionDecl *Callee,
+                                                bool *IsImplicitHD = nullptr);
 
   /// Determines whether Caller may invoke Callee, based on their CUDA
   /// host/device attributes.  Returns false if the call is not allowed.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to