Author: Yaxun (Sam) Liu Date: 2023-08-08T17:39:01-04:00 New Revision: ea72a4e6547feaa82e132746c6777b3b69aed0d5
URL: https://github.com/llvm/llvm-project/commit/ea72a4e6547feaa82e132746c6777b3b69aed0d5 DIFF: https://github.com/llvm/llvm-project/commit/ea72a4e6547feaa82e132746c6777b3b69aed0d5.diff LOG: [CUDA][HIP] Fix template argument deduction nvcc allows using std::malloc and std::free in device code. When std::malloc or std::free is passed as a template function argument with template argument deduction, there is no diagnostics. e.g. __global__ void kern() { void *p = std::malloc(1); std::free(p); } int main() { std::shared_ptr<float> a; a = std::shared_ptr<float>( (float*)std::malloc(sizeof(float) * 100), std::free ); return 0; } However, the same code fails to compile with clang (https://godbolt.org/z/1roGvo6YY). The reason is that clang does not have logic to choose a function argument from an overloaded set of candidates based on host/device attributes for template argument deduction. Currently, clang does have a logic to choose a candidate based on the constraints of the candidates. This patch extends that logic to account for the CUDA host/device-based preference. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D154300 Added: clang/test/SemaCUDA/template-arg-deduction.cu Modified: clang/lib/Sema/SemaOverload.cpp Removed: ################################################################################ diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 3b14fb6b66e450..5d0299dfa752f9 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -12770,6 +12770,13 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) { DeclAccessPair DAP; SmallVector<FunctionDecl *, 2> AmbiguousDecls; + // Return positive for better, negative for worse, 0 for equal preference. + auto CheckCUDAPreference = [&](FunctionDecl *FD1, FunctionDecl *FD2) { + FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); + return static_cast<int>(IdentifyCUDAPreference(Caller, FD1)) - + static_cast<int>(IdentifyCUDAPreference(Caller, FD2)); + }; + auto CheckMoreConstrained = [&](FunctionDecl *FD1, FunctionDecl *FD2) -> std::optional<bool> { if (FunctionDecl *MF = FD1->getInstantiatedFromMemberFunction()) @@ -12800,9 +12807,31 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) { if (!checkAddressOfFunctionIsAvailable(FD)) continue; + // If we found a better result, update Result. + auto FoundBetter = [&]() { + IsResultAmbiguous = false; + DAP = I.getPair(); + Result = FD; + }; + // We have more than one result - see if it is more constrained than the // previous one. if (Result) { + // Check CUDA preference first. If the candidates have diff erennt CUDA + // preference, choose the one with higher CUDA preference. Otherwise, + // choose the one with more constraints. + if (getLangOpts().CUDA) { + int PreferenceByCUDA = CheckCUDAPreference(FD, Result); + // FD has diff erent preference than Result. + if (PreferenceByCUDA != 0) { + // FD is more preferable than Result. + if (PreferenceByCUDA > 0) + FoundBetter(); + continue; + } + } + // FD has the same CUDA prefernece than Result. Continue check + // constraints. std::optional<bool> MoreConstrainedThanPrevious = CheckMoreConstrained(FD, Result); if (!MoreConstrainedThanPrevious) { @@ -12814,9 +12843,7 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) { continue; // FD is more constrained - replace Result with it. } - IsResultAmbiguous = false; - DAP = I.getPair(); - Result = FD; + FoundBetter(); } if (IsResultAmbiguous) @@ -12826,9 +12853,15 @@ Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) { SmallVector<const Expr *, 1> ResultAC; // We skipped over some ambiguous declarations which might be ambiguous with // the selected result. - for (FunctionDecl *Skipped : AmbiguousDecls) + for (FunctionDecl *Skipped : AmbiguousDecls) { + // If skipped candidate has diff erent CUDA preference than the result, + // there is no ambiguity. Otherwise check whether they have diff erent + // constraints. + if (getLangOpts().CUDA && CheckCUDAPreference(Skipped, Result) != 0) + continue; if (!CheckMoreConstrained(Skipped, Result)) return nullptr; + } Pair = DAP; } return Result; diff --git a/clang/test/SemaCUDA/template-arg-deduction.cu b/clang/test/SemaCUDA/template-arg-deduction.cu new file mode 100644 index 00000000000000..22ff34fabdb08f --- /dev/null +++ b/clang/test/SemaCUDA/template-arg-deduction.cu @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s + +// expected-no-diagnostics + +#include "Inputs/cuda.h" + +void foo(); +__device__ void foo(); + +template<class F> +void host_temp(F f); + +template<class F> +__device__ void device_temp(F f); + +void host_caller() { + host_temp(foo); +} + +__global__ void kernel_caller() { + device_temp(foo); +} + +__device__ void device_caller() { + device_temp(foo); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits