Juan Manuel Martinez =?utf-8?q?Caamaño?Message-ID: In-Reply-To: <llvm.org/llvm/llvm-project/pull/[email protected]>
llvmorg-github-actions[bot] wrote: <!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: Juan Manuel Martinez Caamaño (jmmartinez) <details> <summary>Changes</summary> In HIP, `constexpr` functions are treated as both `__host__` and `__device__`. A new version of the MS STL shipped with the build tools version 14.51.36231 has `constexpr` definitions for some `cmath` functions when the compiler in use is Clang (this gets worse when C++23 is in use). These definitions conflict with the `__device__` declarations we provide in the header wrappers. There is a workaround for this: We do not mark `constexpr` functions [_that are defined in a system header_](https://github.com/llvm/llvm-project/blob/03127a03860b9d8cb440fe8f51c00647f45eb8be/clang/lib/Sema/SemaCUDA.cpp#L877) as `__host__` and `__device__` if there is a previous `__device__` declaration. By moving `__clang_cuda_math_forward_declares.h` before `<cmath>` is included we're able to benefit from this behavior. This fixes error like this one https://github.com/ggml-org/llama.cpp/issues/22570 even for recent versions of C++. This patch replaces https://github.com/llvm/llvm-project/pull/200395 --- Full diff: https://github.com/llvm/llvm-project/pull/201563.diff 2 Files Affected: - (modified) clang/lib/Headers/__clang_hip_runtime_wrapper.h (+4-1) - (added) clang/test/Headers/hip-constexpr-cmath.hip (+70) ``````````diff diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h index 19ce7a5d2c86b..72494fdda4ec9 100644 --- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h @@ -111,6 +111,10 @@ __attribute__((weak)) inline __device__ void free(void *__ptr) { #endif //__cplusplus #if !defined(__HIPCC_RTC__) +// We must include the forward declarations before cmath is included. +// Otherwise `constexpr` functions in <cmath> would be implicitely __host__ __device__. +// Declaring the __device__ verison before allows us to overload them with __device__ versions (this behavour is only valid for system headers). +#include <__clang_cuda_math_forward_declares.h> #include <cmath> #include <cstdlib> #include <stdlib.h> @@ -144,7 +148,6 @@ typedef __SIZE_TYPE__ size_t; #if defined(__HIPCC_RTC__) #include <__clang_hip_cmath.h> #else -#include <__clang_cuda_math_forward_declares.h> #include <__clang_hip_cmath.h> #include <__clang_cuda_complex_builtins.h> #include <algorithm> diff --git a/clang/test/Headers/hip-constexpr-cmath.hip b/clang/test/Headers/hip-constexpr-cmath.hip new file mode 100644 index 0000000000000..8b2c3187bf82c --- /dev/null +++ b/clang/test/Headers/hip-constexpr-cmath.hip @@ -0,0 +1,70 @@ +// REQUIRES: amdgpu-registered-target +// RUN: rm -rf %t +// RUN: split-file %s %t +// +// Test with the pre-202604 stl +// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ +// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ +// RUN: -internal-isystem %t/stl/include \ +// RUN: -internal-isystem %t/stl/2025/include \ +// RUN: -internal-isystem %S/Inputs/include \ +// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ +// RUN: -target-cpu gfx906 -verify %t/main.hip -fcuda-is-device -o /dev/null +// +// Test with the 202604 stl +// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ +// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ +// RUN: -internal-isystem %t/stl/include \ +// RUN: -internal-isystem %t/stl/2026/include \ +// RUN: -internal-isystem %S/Inputs/include \ +// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ +// RUN: -target-cpu gfx906 -verify %t/main.hip -fcuda-is-device -o /dev/null + +#--- stl/include/stl_common +_CLANG_BUILTIN2(isunordered) +_CLANG_BUILTIN2(isgreater) +_CLANG_BUILTIN2(isgreaterequal) +_CLANG_BUILTIN2(isless) +_CLANG_BUILTIN2(islessequal) +_CLANG_BUILTIN2(islessgreater) + +#define FP_NAN 0 +#define FP_INFINITE 1 +#define FP_ZERO 2 +#define FP_SUBNORMAL 3 +#define FP_NORMAL 4 +#--- stl/2025/include/version +#define _MSVC_STL_UPDATE 202508L +#--- stl/2025/include/cmath +#define _CLANG_BUILTIN2(NAME) \ + bool NAME(float x, float y) noexcept { \ + return __builtin_##NAME(x, y); \ + } + +#include <stl_common> +#--- stl/2026/include/version +#define _MSVC_STL_UPDATE 202604L +#--- stl/2026/include/cmath +#define _CLANG_BUILTIN2(NAME) \ + bool constexpr NAME(float x, float y) noexcept { \ + return __builtin_##NAME(x, y); \ + } + +#include <stl_common> +#--- main.hip +// expected-no-diagnostics + +#include <cmath> + +#define TEST_CMP(NAME) \ + bool __attribute__((device)) test_device_ ## NAME (float x) { \ + bool b = NAME(x, x); \ + return b; \ + } + +TEST_CMP(isunordered) +TEST_CMP(isgreater) +TEST_CMP(isgreaterequal) +TEST_CMP(isless) +TEST_CMP(islessequal) +TEST_CMP(islessgreater) `````````` </details> https://github.com/llvm/llvm-project/pull/201563 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
