Juan Manuel Martinez =?utf-8?q?Caamaño?Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/[email protected]>


llvmorg-github-actions[bot] wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Juan Manuel Martinez Caamaño (jmmartinez)

<details>
<summary>Changes</summary>

In HIP, `constexpr` functions are treated as both `__host__` and `__device__`.

A new version of the MS STL shipped with the build tools version
14.51.36231 has `constexpr` definitions for some `cmath` functions when the
compiler in use is Clang (this gets worse when C++23 is in use).

These definitions conflict with the `__device__` declarations we provide
in the header wrappers.

There is a workaround for this: We do not mark `constexpr`
functions [_that are defined in a system 
header_](https://github.com/llvm/llvm-project/blob/03127a03860b9d8cb440fe8f51c00647f45eb8be/clang/lib/Sema/SemaCUDA.cpp#L877)
 as
`__host__` and `__device__` if there is a previous `__device__`
 declaration.

By moving `__clang_cuda_math_forward_declares.h` before `&lt;cmath&gt;` is
included we're able to benefit from this behavior.

This fixes error like this one 
https://github.com/ggml-org/llama.cpp/issues/22570
even for recent versions of C++.

This patch replaces https://github.com/llvm/llvm-project/pull/200395

---
Full diff: https://github.com/llvm/llvm-project/pull/201563.diff


2 Files Affected:

- (modified) clang/lib/Headers/__clang_hip_runtime_wrapper.h (+4-1) 
- (added) clang/test/Headers/hip-constexpr-cmath.hip (+70) 


``````````diff
diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h 
b/clang/lib/Headers/__clang_hip_runtime_wrapper.h
index 19ce7a5d2c86b..72494fdda4ec9 100644
--- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h
@@ -111,6 +111,10 @@ __attribute__((weak)) inline __device__ void free(void 
*__ptr) {
 #endif //__cplusplus
 
 #if !defined(__HIPCC_RTC__)
+// We must include the forward declarations before cmath is included.
+// Otherwise `constexpr` functions in <cmath> would be implicitely __host__ 
__device__.
+// Declaring the __device__ verison before allows us to overload them with 
__device__ versions (this behavour is only valid for system headers).
+#include <__clang_cuda_math_forward_declares.h>
 #include <cmath>
 #include <cstdlib>
 #include <stdlib.h>
@@ -144,7 +148,6 @@ typedef __SIZE_TYPE__ size_t;
 #if defined(__HIPCC_RTC__)
 #include <__clang_hip_cmath.h>
 #else
-#include <__clang_cuda_math_forward_declares.h>
 #include <__clang_hip_cmath.h>
 #include <__clang_cuda_complex_builtins.h>
 #include <algorithm>
diff --git a/clang/test/Headers/hip-constexpr-cmath.hip 
b/clang/test/Headers/hip-constexpr-cmath.hip
new file mode 100644
index 0000000000000..8b2c3187bf82c
--- /dev/null
+++ b/clang/test/Headers/hip-constexpr-cmath.hip
@@ -0,0 +1,70 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+//
+// Test with the pre-202604 stl
+// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
+// RUN:   -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN:   -internal-isystem %t/stl/include \
+// RUN:   -internal-isystem %t/stl/2025/include \
+// RUN:   -internal-isystem %S/Inputs/include \
+// RUN:   -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
+// RUN:   -target-cpu gfx906 -verify %t/main.hip -fcuda-is-device -o /dev/null
+//
+// Test with the 202604 stl
+// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
+// RUN:   -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN:   -internal-isystem %t/stl/include \
+// RUN:   -internal-isystem %t/stl/2026/include \
+// RUN:   -internal-isystem %S/Inputs/include \
+// RUN:   -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
+// RUN:   -target-cpu gfx906 -verify %t/main.hip -fcuda-is-device -o /dev/null
+
+#--- stl/include/stl_common
+_CLANG_BUILTIN2(isunordered)
+_CLANG_BUILTIN2(isgreater)
+_CLANG_BUILTIN2(isgreaterequal)
+_CLANG_BUILTIN2(isless)
+_CLANG_BUILTIN2(islessequal)
+_CLANG_BUILTIN2(islessgreater)
+
+#define FP_NAN 0
+#define FP_INFINITE 1
+#define FP_ZERO 2
+#define FP_SUBNORMAL 3
+#define FP_NORMAL 4
+#--- stl/2025/include/version
+#define _MSVC_STL_UPDATE 202508L
+#--- stl/2025/include/cmath
+#define _CLANG_BUILTIN2(NAME) \
+    bool NAME(float x, float y) noexcept { \
+        return __builtin_##NAME(x, y); \
+    }
+
+#include <stl_common>
+#--- stl/2026/include/version
+#define _MSVC_STL_UPDATE 202604L
+#--- stl/2026/include/cmath
+#define _CLANG_BUILTIN2(NAME) \
+    bool constexpr NAME(float x, float y) noexcept { \
+        return __builtin_##NAME(x, y); \
+    }
+
+#include <stl_common>
+#--- main.hip
+// expected-no-diagnostics
+
+#include <cmath>
+
+#define TEST_CMP(NAME) \
+    bool __attribute__((device)) test_device_ ## NAME (float x) { \
+        bool b = NAME(x, x); \
+        return b; \
+    }
+
+TEST_CMP(isunordered)
+TEST_CMP(isgreater)
+TEST_CMP(isgreaterequal)
+TEST_CMP(isless)
+TEST_CMP(islessequal)
+TEST_CMP(islessgreater)

``````````

</details>


https://github.com/llvm/llvm-project/pull/201563
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to