[PATCH] D77238: [CUDA][NFC] Split math.h functions out of __clang_cuda_device_functions.h

2020-04-02 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

In D77238#1956547 , @hliao wrote:

> this patch breaks the test 
> clang/test/Headers/nvptx_device_cmath_functions.cpp, could you please fix it 
> ASAP?
>  Once NVPTX target is enabled, the following tests failed:
>
> Failing Tests (6):
>
>   Clang :: Headers/nvptx_device_cmath_functions.c
>   Clang :: Headers/nvptx_device_cmath_functions.cpp
>   Clang :: Headers/nvptx_device_cmath_functions_cxx17.cpp
>   Clang :: Headers/nvptx_device_math_functions.c
>   Clang :: Headers/nvptx_device_math_functions.cpp
>   Clang :: Headers/nvptx_device_math_functions_cxx17.cpp


Aware, should be fixed by b0b5f0416be60152ddc8d606b1720daba0005518 
. 
Apologies for the noise.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77238/new/

https://reviews.llvm.org/D77238



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77238: [CUDA][NFC] Split math.h functions out of __clang_cuda_device_functions.h

2020-04-01 Thread Johannes Doerfert via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGd1705c1196fe: [CUDA][NFC] Split math.h functions out of 
__clang_cuda_device_functions.h (authored by jdoerfert).

Changed prior to commit:
  https://reviews.llvm.org/D77238?vs=254289=254426#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77238/new/

https://reviews.llvm.org/D77238

Files:
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/__clang_cuda_device_functions.h
  clang/lib/Headers/__clang_cuda_math.h
  clang/lib/Headers/__clang_cuda_runtime_wrapper.h

Index: clang/lib/Headers/__clang_cuda_runtime_wrapper.h
===
--- clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -143,11 +143,12 @@
 // to provide our own.
 #include <__clang_cuda_libdevice_declares.h>
 
-// Wrappers for many device-side standard library functions became compiler
-// builtins in CUDA-9 and have been removed from the CUDA headers. Clang now
-// provides its own implementation of the wrappers.
+// Wrappers for many device-side standard library functions, incl. math
+// functions, became compiler builtins in CUDA-9 and have been removed from the
+// CUDA headers. Clang now provides its own implementation of the wrappers.
 #if CUDA_VERSION >= 9000
 #include <__clang_cuda_device_functions.h>
+#include <__clang_cuda_math.h>
 #endif
 
 // __THROW is redefined to be empty by device_functions_decls.h in CUDA. Clang's
Index: clang/lib/Headers/__clang_cuda_math.h
===
--- /dev/null
+++ clang/lib/Headers/__clang_cuda_math.h
@@ -0,0 +1,347 @@
+/*=== __clang_cuda_math.h - Device-side CUDA math support --===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===---===
+ */
+#ifndef __CLANG_CUDA_MATH_H__
+#define __CLANG_CUDA_MATH_H__
+#ifndef __CUDA__
+#error "This file is for CUDA compilation only."
+#endif
+
+#ifndef _OPENMP
+#if CUDA_VERSION < 9000
+#error This file is intended to be used with CUDA-9+ only.
+#endif
+#endif
+
+// __DEVICE__ is a helper macro with common set of attributes for the wrappers
+// we implement in this file. We need static in order to avoid emitting unused
+// functions and __forceinline__ helps inlining these wrappers at -O1.
+#pragma push_macro("__DEVICE__")
+#ifdef _OPENMP
+#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
+#else
+#define __DEVICE__ static __device__ __forceinline__
+#endif
+
+// Specialized version of __DEVICE__ for functions with void return type. Needed
+// because the OpenMP overlay requires constexpr functions here but prior to
+// c++14 void return functions could not be constexpr.
+#pragma push_macro("__DEVICE_VOID__")
+#ifdef _OPENMP
+#if defined(__cplusplus) && __cplusplus >= 201402L
+#define __DEVICE_VOID__ static constexpr __attribute__((always_inline, nothrow))
+#else
+#define __DEVICE_VOID__ static __attribute__((always_inline, nothrow))
+#endif
+#else
+#define __DEVICE_VOID__ __DEVICE__
+#endif
+
+// libdevice provides fast low precision and slow full-recision implementations
+// for some functions. Which one gets selected depends on
+// __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if
+// -ffast-math or -fcuda-approx-transcendentals are in effect.
+#pragma push_macro("__FAST_OR_SLOW")
+#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
+#define __FAST_OR_SLOW(fast, slow) fast
+#else
+#define __FAST_OR_SLOW(fast, slow) slow
+#endif
+
+__DEVICE__ int abs(int __a) { return __nv_abs(__a); }
+__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }
+__DEVICE__ double acos(double __a) { return __nv_acos(__a); }
+__DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }
+__DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }
+__DEVICE__ float acoshf(float __a) { return __nv_acoshf(__a); }
+__DEVICE__ double asin(double __a) { return __nv_asin(__a); }
+__DEVICE__ float asinf(float __a) { return __nv_asinf(__a); }
+__DEVICE__ double asinh(double __a) { return __nv_asinh(__a); }
+__DEVICE__ float asinhf(float __a) { return __nv_asinhf(__a); }
+__DEVICE__ double atan(double __a) { return __nv_atan(__a); }
+__DEVICE__ double atan2(double __a, double __b) { return __nv_atan2(__a, __b); }
+__DEVICE__ float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); }
+__DEVICE__ float atanf(float __a) { return __nv_atanf(__a); }
+__DEVICE__ double atanh(double __a) { return __nv_atanh(__a); }
+__DEVICE__ float atanhf(float __a) { return __nv_atanhf(__a); }
+__DEVICE__ double cbrt(double __a) { return __nv_cbrt(__a); }
+__DEVICE__ float cbrtf(float __a) { return 

[PATCH] D77238: [CUDA][NFC] Split math.h functions out of __clang_cuda_device_functions.h

2020-04-01 Thread Michael Liao via Phabricator via cfe-commits
hliao added a comment.

this patch breaks the test clang/test/Headers/nvptx_device_cmath_functions.cpp, 
could you please fix it ASAP?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77238/new/

https://reviews.llvm.org/D77238



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77238: [CUDA][NFC] Split math.h functions out of __clang_cuda_device_functions.h

2020-04-01 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert marked an inline comment as done.
jdoerfert added inline comments.



Comment at: clang/lib/Headers/__clang_cuda_math.h:81
+#endif
+__DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }
+__DEVICE__ double copysign(double __a, double __b) {

I accidentally moved the clock functions and added the openmp ifdef we'll need 
later. I'll move the clock functions back before I commit.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77238/new/

https://reviews.llvm.org/D77238



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D77238: [CUDA][NFC] Split math.h functions out of __clang_cuda_device_functions.h

2020-04-01 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert created this revision.
jdoerfert added a reviewer: tra.
Herald added subscribers: bollu, yaxunl, mgorny.
Herald added a project: clang.
jdoerfert added a child revision: D77239: [CUDA][NFCI] Use unqualified lookup 
for math functions.

This is not supported to change anything but allow us to reuse the math
functions separately from the device functions, e.g., source them at
different times. This will be used by the OpenMP overlay.

This also adds two `return` keywords that were missing.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D77238

Files:
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/__clang_cuda_device_functions.h
  clang/lib/Headers/__clang_cuda_math.h
  clang/lib/Headers/__clang_cuda_runtime_wrapper.h

Index: clang/lib/Headers/__clang_cuda_runtime_wrapper.h
===
--- clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -141,11 +141,12 @@
 // to provide our own.
 #include <__clang_cuda_libdevice_declares.h>
 
-// Wrappers for many device-side standard library functions became compiler
-// builtins in CUDA-9 and have been removed from the CUDA headers. Clang now
-// provides its own implementation of the wrappers.
+// Wrappers for many device-side standard library functions, incl. math
+// functions, became compiler builtins in CUDA-9 and have been removed from the
+// CUDA headers. Clang now provides its own implementation of the wrappers.
 #if CUDA_VERSION >= 9000
 #include <__clang_cuda_device_functions.h>
+#include <__clang_cuda_math.h>
 #endif
 
 // __THROW is redefined to be empty by device_functions_decls.h in CUDA. Clang's
Index: clang/lib/Headers/__clang_cuda_math.h
===
--- /dev/null
+++ clang/lib/Headers/__clang_cuda_math.h
@@ -0,0 +1,363 @@
+/*=== __clang_cuda_math.h - Device-side CUDA math support --===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===---===
+ */
+#ifndef __CLANG_CUDA_MATH_H__
+#define __CLANG_CUDA_MATH_H__
+#ifndef __CUDA__
+#error "This file is for CUDA compilation only."
+#endif
+
+#ifndef _OPENMP
+#if CUDA_VERSION < 9000
+#error This file is intended to be used with CUDA-9+ only.
+#endif
+#endif
+
+// __DEVICE__ is a helper macro with common set of attributes for the wrappers
+// we implement in this file. We need static in order to avoid emitting unused
+// functions and __forceinline__ helps inlining these wrappers at -O1.
+#pragma push_macro("__DEVICE__")
+#ifdef _OPENMP
+#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
+#else
+#define __DEVICE__ static __device__ __forceinline__
+#endif
+
+// Specialized version of __DEVICE__ for functions with void return type. Needed
+// because the OpenMP overlay requires constexpr functions here but prior to
+// c++14 void return functions could not be constexpr.
+#pragma push_macro("__DEVICE_VOID__")
+#ifdef _OPENMP
+#if defined(__cplusplus) && __cplusplus >= 201402L
+#define __DEVICE_VOID__ static constexpr __attribute__((always_inline, nothrow))
+#else
+#define __DEVICE_VOID__ static __attribute__((always_inline, nothrow))
+#endif
+#else
+#define __DEVICE_VOID__ __DEVICE__
+#endif
+
+// libdevice provides fast low precision and slow full-recision implementations
+// for some functions. Which one gets selected depends on
+// __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if
+// -ffast-math or -fcuda-approx-transcendentals are in effect.
+#pragma push_macro("__FAST_OR_SLOW")
+#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
+#define __FAST_OR_SLOW(fast, slow) fast
+#else
+#define __FAST_OR_SLOW(fast, slow) slow
+#endif
+
+__DEVICE__ int abs(int __a) { return __nv_abs(__a); }
+__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }
+__DEVICE__ double acos(double __a) { return __nv_acos(__a); }
+__DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }
+__DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }
+__DEVICE__ float acoshf(float __a) { return __nv_acoshf(__a); }
+__DEVICE__ double asin(double __a) { return __nv_asin(__a); }
+__DEVICE__ float asinf(float __a) { return __nv_asinf(__a); }
+__DEVICE__ double asinh(double __a) { return __nv_asinh(__a); }
+__DEVICE__ float asinhf(float __a) { return __nv_asinhf(__a); }
+__DEVICE__ double atan(double __a) { return __nv_atan(__a); }
+__DEVICE__ double atan2(double __a, double __b) { return __nv_atan2(__a, __b); }
+__DEVICE__ float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); }
+__DEVICE__ float atanf(float __a) { return __nv_atanf(__a); }
+__DEVICE__ double atanh(double __a) { return __nv_atanh(__a); }
+__DEVICE__ float