[PATCH] D18380: [CUDA] Implement -fcuda-relaxed-constexpr, and enable it by default.

Justin Lebar via cfe-commits Tue, 22 Mar 2016 15:09:12 -0700

jlebar created this revision.
jlebar added a reviewer: tra.
jlebar added subscribers: rsmith, rnk, cfe-commits.


All constexpr functions are implicitly host+device, except for variadic
functions, which are not allowed on the device side.

As part of this change, we now allow you to overload a host+device
function with another function of the same signature but different
attributes.

A side-effect is that we'd previously accept a decl that had __host__
__device__ plus an unannotated def -- there was no ambiguity, because
you couldn't override the decl with different attrs, but now there is
ambiguity.  This causes us to reject some programs we previously
accepted.

http://reviews.llvm.org/D18380

Files:
  include/clang/Basic/LangOptions.def
  include/clang/Driver/CC1Options.td
  lib/Driver/Tools.cpp
  lib/Frontend/CompilerInvocation.cpp
  lib/Sema/SemaDecl.cpp
  lib/Sema/SemaOverload.cpp
  test/SemaCUDA/relaxed-constexpr.cu

Index: test/SemaCUDA/relaxed-constexpr.cu
===================================================================
--- /dev/null
+++ test/SemaCUDA/relaxed-constexpr.cu
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s -fcuda-target-overloads
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s -fcuda-target-overloads -fcuda-is-device
+
+// expected-no-diagnostics
+
+#include "Inputs/cuda.h"
+
+static __device__ void f1();
+constexpr void f1();
+
+__device__ void f2();
+static constexpr void f2();
+
+// Different potential error depending on the order of declaration.
+constexpr void f3();
+static __device__ void f3();
+
+static constexpr void f4();
+__device__ void f4();
+
+// Variadic device functions are not allowed, so this is just treated as
+// host-only.
+constexpr void variadic(const char*, ...);
Index: lib/Sema/SemaOverload.cpp
===================================================================
--- lib/Sema/SemaOverload.cpp
+++ lib/Sema/SemaOverload.cpp
@@ -1126,13 +1126,10 @@
 
     assert((OldTarget != CFT_InvalidTarget) && "Unexpected invalid target.");
 
-    // Don't allow mixing of HD with other kinds. This guarantees that
-    // we have only one viable function with this signature on any
-    // side of CUDA compilation .
-    // __global__ functions can't be overloaded based on attribute
-    // difference because, like HD, they also exist on both sides.
-    if ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) ||
-        (NewTarget == CFT_Global) || (OldTarget == CFT_Global))
+    // Don't allow __global__ functions to be overloaded with other functions,
+    // based solely on their CUDA attributes. This guarantees that we have only
+    // one viable function with this signature on any side of CUDA compilation.
+    if ((NewTarget == CFT_Global) || (OldTarget == CFT_Global))
       return false;
 
     // Allow overloading of functions with same signature, but
Index: lib/Sema/SemaDecl.cpp
===================================================================
--- lib/Sema/SemaDecl.cpp
+++ lib/Sema/SemaDecl.cpp
@@ -8006,6 +8006,15 @@
   // Handle attributes.
   ProcessDeclAttributes(S, NewFD, D);
 
+  // With -fcuda-relaxed-constexpr, constexpr functions are treated as
+  // implicitly __host__ __device__.  Device-side variadic functions are not
+  // allowed, so we just treat those as host-only.
+  if (getLangOpts().CUDA && NewFD->isConstexpr() && !NewFD->isVariadic() &&
+      getLangOpts().CUDARelaxedConstexpr) {
+    NewFD->addAttr(CUDAHostAttr::CreateImplicit(Context));
+    NewFD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
+  }
+
   if (getLangOpts().OpenCL) {
     // OpenCL v1.1 s6.5: Using an address space qualifier in a function return
     // type declaration will generate a compilation error.
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -1569,6 +1569,9 @@
   if (Args.hasArg(OPT_fcuda_allow_variadic_functions))
     Opts.CUDAAllowVariadicFunctions = 1;
 
+  if (Args.hasArg(OPT_fcuda_relaxed_constexpr))
+    Opts.CUDARelaxedConstexpr = 1;
+
   if (Opts.ObjC1) {
     if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
       StringRef value = arg->getValue();
Index: lib/Driver/Tools.cpp
===================================================================
--- lib/Driver/Tools.cpp
+++ lib/Driver/Tools.cpp
@@ -3594,6 +3594,7 @@
     CmdArgs.push_back(Args.MakeArgString(AuxToolChain->getTriple().str()));
     CmdArgs.push_back("-fcuda-target-overloads");
     CmdArgs.push_back("-fcuda-disable-target-call-checks");
+    CmdArgs.push_back("-fcuda-relaxed-constexpr");
   }
 
   if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm ||
Index: include/clang/Driver/CC1Options.td
===================================================================
--- include/clang/Driver/CC1Options.td
+++ include/clang/Driver/CC1Options.td
@@ -699,6 +699,8 @@
   HelpText<"Enable function overloads based on CUDA target attributes.">;
 def fcuda_allow_variadic_functions : Flag<["-"], "fcuda-allow-variadic-functions">,
   HelpText<"Allow variadic functions in CUDA device code.">;
+def fcuda_relaxed_constexpr : Flag<["-"], "fcuda-relaxed-constexpr">,
+  HelpText<"Treat constexpr functions as __host__ __device__.">;
 
 //===----------------------------------------------------------------------===//
 // OpenMP Options
Index: include/clang/Basic/LangOptions.def
===================================================================
--- include/clang/Basic/LangOptions.def
+++ include/clang/Basic/LangOptions.def
@@ -175,6 +175,7 @@
 LANGOPT(CUDADisableTargetCallChecks, 1, 0, "Disable checks for call targets (host, device, etc.)")
 LANGOPT(CUDATargetOverloads, 1, 0, "Enable function overloads based on CUDA target attributes")
 LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "Allow variadic functions in CUDA device code")
+LANGOPT(CUDARelaxedConstexpr, 1, 0, "Treat constexpr functions as __host__ __device__")
 
 LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators")
 LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D18380: [CUDA] Implement -fcuda-relaxed-constexpr, and enable it by default.

Reply via email to