jdoerfert updated this revision to Diff 254375. jdoerfert added a comment. Herald added a subscriber: yaxunl.
Rewrite. Wrap math.h, time.h, and cmath. Preload only device functions. Passes all 185 math c++11 tests from [0] which do not deal with long double. [0] https://github.com/TApplencourt/OmpVal Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D75788/new/ https://reviews.llvm.org/D75788 Files: clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Headers/CMakeLists.txt clang/lib/Headers/__clang_cuda_cmath.h clang/lib/Headers/__clang_cuda_device_functions.h clang/lib/Headers/__clang_cuda_math.h clang/lib/Headers/__clang_cuda_math_forward_declares.h clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h clang/lib/Headers/openmp_wrappers/cmath clang/lib/Headers/openmp_wrappers/math.h clang/lib/Headers/openmp_wrappers/time.h clang/test/Headers/Inputs/include/climits clang/test/Headers/Inputs/include/cmath clang/test/Headers/Inputs/include/cstdlib clang/test/Headers/Inputs/include/math.h clang/test/Headers/Inputs/include/stdlib.h clang/test/Headers/nvptx_device_cmath_functions.c clang/test/Headers/nvptx_device_cmath_functions.cpp clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp clang/test/Headers/nvptx_device_math_complex.c clang/test/Headers/nvptx_device_math_functions.c clang/test/Headers/nvptx_device_math_functions.cpp clang/test/Headers/nvptx_device_math_functions_cxx17.cpp clang/test/Headers/nvptx_device_math_macro.cpp clang/test/Headers/nvptx_device_math_modf.cpp clang/test/Headers/nvptx_device_math_sin.c clang/test/Headers/nvptx_device_math_sin.cpp clang/test/Headers/nvptx_device_math_sin_cos.cpp clang/test/Headers/nvptx_device_math_sincos.cpp llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
Index: llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -194,7 +194,7 @@ "openmp_wrappers/math.h", "openmp_wrappers/cmath", "openmp_wrappers/__clang_openmp_math.h", - "openmp_wrappers/__clang_openmp_math_declares.h", + "openmp_wrappers/__clang_openmp_device_functions.h", ] outputs = [ "$clang_resource_dir/include/{{source_target_relative}}" ] } Index: clang/test/Headers/nvptx_device_math_sincos.cpp =================================================================== --- /dev/null +++ clang/test/Headers/nvptx_device_math_sincos.cpp @@ -0,0 +1,58 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s + +#include <cmath> + +// 4 calls to sincos(f), all translated to __nv_sincos calls: + +// CHECK-NOT: _Z.sincos +// CHECK: call void @__nv_sincos(double +// CHECK-NOT: _Z.sincos +// CHECK: call void @__nv_sincosf(float +// CHECK-NOT: _Z.sincos +// CHECK: call void @__nv_sincos(double +// CHECK-NOT: _Z.sincos +// CHECK: call void @__nv_sincosf(float +// CHECK-NOT: _Z.sincos + +// single precision wrapper +inline void sincos(float x, float* __restrict__ sin, float* __restrict__ cos) +{ + sincosf(x, sin, cos); +} + +template<typename T> +void test_sincos(T x) +{ + T res_sin, res_cos; + + #pragma omp target map(from: res_sin, res_cos) + { + sincos(x, &res_sin, &res_cos); + } + +} + +int main(int argc, char **argv) +{ + +#if !defined(C_ONLY) + test_sincos<double>(0.0); + test_sincos<float>(0.0); +#endif + + #pragma omp target + { + double s, c; + sincos(0, &s, &c); + } + + #pragma omp target + { + float s, c; + sincosf(0.f, &s, &c); + } + + return 0; +} Index: clang/test/Headers/nvptx_device_math_sin_cos.cpp =================================================================== --- /dev/null +++ clang/test/Headers/nvptx_device_math_sin_cos.cpp @@ -0,0 +1,63 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s + +#include <cmath> + +// 6 calls to sin/cos(f), all translated to __nv_sin/__nv_cos calls: + +// CHECK-NOT: _Z.sin +// CHECK-NOT: _Z.cos +// CHECK: call double @__nv_sin(double +// CHECK-NOT: _Z.sin +// CHECK-NOT: _Z.cos +// CHECK: call float @__nv_sinf(float +// CHECK-NOT: _Z.sin +// CHECK-NOT: _Z.cos +// CHECK: call double @__nv_sin(double +// CHECK-NOT: _Z.sin +// CHECK-NOT: _Z.cos +// CHECK: call double @__nv_cos(double +// CHECK-NOT: _Z.sin +// CHECK-NOT: _Z.cos +// CHECK: call float @__nv_sinf(float +// CHECK-NOT: _Z.sin +// CHECK-NOT: _Z.cos +// CHECK: call float @__nv_cosf(float +// CHECK-NOT: _Z.sin +// CHECK-NOT: _Z.cos + +template<typename T> +void test_sin_cos(T x) +{ + T res_sin, res_cos; + + #pragma omp target map(from: res_sin, res_cos) + { + res_sin = std::sin(x); + res_cos = std::cos(x); + } +} + +int main() +{ + +#if !defined(C_ONLY) + test_sin_cos<double>(0.0); + test_sin_cos<float>(0.0); +#endif + + #pragma omp target + { + double res; + res = sin(1.0); + } + + #pragma omp target + { + float res; + res = sinf(1.0f); + } + + return 0; +} Index: clang/test/Headers/nvptx_device_math_sin.cpp =================================================================== --- /dev/null +++ clang/test/Headers/nvptx_device_math_sin.cpp @@ -0,0 +1,27 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=SLOW +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math | FileCheck %s --check-prefix=FAST +// expected-no-diagnostics + +#include <cmath> + +double math(float f, double d, long double ld) { + double r = 0; +// SLOW: call float @__nv_sinf(float +// FAST: call fast float @__nv_fast_sinf(float + r += sin(f); +// SLOW: call double @__nv_sin(double +// FAST: call fast double @__nv_sin(double + r += sin(d); + return r; +} + +long double foo(float f, double d, long double ld) { + double r = ld; + r += math(f, d, ld); +#pragma omp target map(r) + { r += math(f, d, ld); } + return r; +} Index: clang/test/Headers/nvptx_device_math_sin.c =================================================================== --- /dev/null +++ clang/test/Headers/nvptx_device_math_sin.c @@ -0,0 +1,27 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -x c -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=SLOW +// RUN: %clang_cc1 -x c -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math +// RUN: %clang_cc1 -x c -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math | FileCheck %s --check-prefix=FAST +// expected-no-diagnostics + +#include <math.h> + +double math(float f, double d, long double ld) { + double r = 0; +// SLOW: call float @__nv_sinf(float +// FAST: call fast float @__nv_fast_sinf(float + r += sinf(f); +// SLOW: call double @__nv_sin(double +// FAST: call fast double @__nv_sin(double + r += sin(d); + return r; +} + +long double foo(float f, double d, long double ld) { + double r = ld; + r += math(f, d, ld); +#pragma omp target map(r) + { r += math(f, d, ld); } + return r; +} Index: clang/test/Headers/nvptx_device_math_modf.cpp =================================================================== --- /dev/null +++ clang/test/Headers/nvptx_device_math_modf.cpp @@ -0,0 +1,53 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s + +#include <cmath> + +// 4 calls to modf(f), all translated to __nv_modf calls: + +// CHECK-NOT: _Z.modf +// CHECK: call double @__nv_modf(double +// CHECK-NOT: _Z.modf +// CHECK: call float @__nv_modff(float +// CHECK-NOT: _Z.modf +// CHECK: call double @__nv_modf(double +// CHECK-NOT: _Z.modf +// CHECK: call float @__nv_modff(float +// CHECK-NOT: _Z.modf + +template<typename T> +void test_modf(T x) +{ + T dx; + int intx; + + #pragma omp target map(from: intx, dx) + { + T ipart; + dx = std::modf(x, &ipart); + intx = static_cast<int>(ipart); + } +} + +int main() +{ + +#if !defined(C_ONLY) + test_modf<double>(1.0); + test_modf<float>(1.0); +#endif + + #pragma omp target + { + double intpart, res; + res = modf(1.1, &intpart); + } + + #pragma omp target + { + float intpart, res; + res = modff(1.1f, &intpart); + } + +} Index: clang/test/Headers/nvptx_device_math_macro.cpp =================================================================== --- /dev/null +++ clang/test/Headers/nvptx_device_math_macro.cpp @@ -0,0 +1,17 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// expected-no-diagnostics + +#include <cmath> + +#pragma omp declare target +int use_macro() { + double a(0); +// CHECK-NOT: call +// CHECK: call double @llvm.fabs.f64(double +// CHECK-NOT: call +// CHECK: ret i32 %conv + return (std::fpclassify(a) != FP_ZERO); +} +#pragma omp end declare target Index: clang/test/Headers/nvptx_device_math_functions_cxx17.cpp =================================================================== --- clang/test/Headers/nvptx_device_math_functions_cxx17.cpp +++ clang/test/Headers/nvptx_device_math_functions_cxx17.cpp @@ -3,11 +3,11 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include math.h -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -std=c++17 -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include math.h -internal-isystem %S/Inputs/include -include stdlib.h -include limits -include cstdlib -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -std=c++17 +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s #include <cstdlib> -#include <math.h> +#include <cmath> void test_sqrt(double a1) { #pragma omp target Index: clang/test/Headers/nvptx_device_math_functions.cpp =================================================================== --- clang/test/Headers/nvptx_device_math_functions.cpp +++ clang/test/Headers/nvptx_device_math_functions.cpp @@ -3,11 +3,11 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include math.h -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include math.h -internal-isystem %S/Inputs/include -include stdlib.h -include limits -include cstdlib -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s #include <cstdlib> -#include <math.h> +#include <cmath> void test_sqrt(double a1) { #pragma omp target Index: clang/test/Headers/nvptx_device_math_functions.c =================================================================== --- clang/test/Headers/nvptx_device_math_functions.c +++ clang/test/Headers/nvptx_device_math_functions.c @@ -3,23 +3,31 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include math.h -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include math.h -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -x c -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c++ -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +#ifdef __cplusplus +#include <cstdlib> +#include <cmath> +#else +#include <stdlib.h> #include <math.h> +#endif void test_sqrt(double a1) { #pragma omp target { - // CHECK-YES: call double @__nv_sqrt(double + // CHECK: call double @__nv_sqrt(double double l1 = sqrt(a1); - // CHECK-YES: call double @__nv_pow(double + // CHECK: call double @__nv_pow(double double l2 = pow(a1, a1); - // CHECK-YES: call double @__nv_modf(double + // CHECK: call double @__nv_modf(double double l3 = modf(a1 + 3.5, &a1); - // CHECK-YES: call double @__nv_fabs(double + // CHECK: call double @__nv_fabs(double double l4 = fabs(a1); - // CHECK-YES: call i32 @__nv_abs(i32 + // CHECK: call i32 @__nv_abs(i32 double l5 = abs((int)a1); } } Index: clang/test/Headers/nvptx_device_math_complex.c =================================================================== --- /dev/null +++ clang/test/Headers/nvptx_device_math_complex.c @@ -0,0 +1,23 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// expected-no-diagnostics + +// CHECK-DAG: call { float, float } @__divsc3( +// CHECK-DAG: call { float, float } @__mulsc3( +void test_scmplx(float _Complex a) { +#pragma omp target + { + (void)(a * (a / a)); + } +} + + +// CHECK-DAG: call { double, double } @__divdc3( +// CHECK-DAG: call { double, double } @__muldc3( +void test_dcmplx(double _Complex a) { +#pragma omp target + { + (void)(a * (a / a)); + } +} Index: clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp =================================================================== --- clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp +++ clang/test/Headers/nvptx_device_cmath_functions_cxx17.cpp @@ -3,8 +3,8 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include cmath -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -std=c++17 -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include cmath -internal-isystem %S/Inputs/include -include stdlib.h -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -std=c++17 +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -std=c++17 -o - | FileCheck -check-prefix CHECK-YES %s #include <cmath> #include <cstdlib> Index: clang/test/Headers/nvptx_device_cmath_functions.cpp =================================================================== --- clang/test/Headers/nvptx_device_cmath_functions.cpp +++ clang/test/Headers/nvptx_device_cmath_functions.cpp @@ -3,8 +3,8 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include cmath -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include cmath -internal-isystem %S/Inputs/include -include stdlib.h -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s #include <cmath> #include <cstdlib> Index: clang/test/Headers/nvptx_device_cmath_functions.c =================================================================== --- clang/test/Headers/nvptx_device_cmath_functions.c +++ clang/test/Headers/nvptx_device_cmath_functions.c @@ -3,10 +3,11 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -include cmath -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_math_declares.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -include cmath -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix CHECK-YES %s -#include <cmath> +#include <stdlib.h> +#include <math.h> void test_sqrt(double a1) { #pragma omp target Index: clang/test/Headers/Inputs/include/stdlib.h =================================================================== --- clang/test/Headers/Inputs/include/stdlib.h +++ clang/test/Headers/Inputs/include/stdlib.h @@ -1,2 +1,6 @@ #pragma once typedef __SIZE_TYPE__ size_t; + +#ifndef __cplusplus +extern int abs(int __x) __attribute__((__const__)); +#endif Index: clang/test/Headers/Inputs/include/math.h =================================================================== --- clang/test/Headers/Inputs/include/math.h +++ clang/test/Headers/Inputs/include/math.h @@ -1,5 +1,199 @@ #pragma once -double sqrt(double); -double pow(double, double); -double modf(double, double*); +// __clang_cuda_(c)math(.h) also provide `abs` which actually belong in +// cstdlib. We could split them out but for now we just include cstdlib from +// cmath.h which is what the systems I've seen do as well. +#include <stdlib.h> + +double fabs(double __a); +double acos(double __a); +float acosf(float __a); +double acosh(double __a); +float acoshf(float __a); +double asin(double __a); +float asinf(float __a); +double asinh(double __a); +float asinhf(float __a); +double atan(double __a); +double atan2(double __a, double __b); +float atan2f(float __a, float __b); +float atanf(float __a); +double atanh(double __a); +float atanhf(float __a); +double cbrt(double __a); +float cbrtf(float __a); +double ceil(double __a); +float ceilf(float __a); +double copysign(double __a, double __b); +float copysignf(float __a, float __b); +double cos(double __a); +float cosf(float __a); +double cosh(double __a); +float coshf(float __a); +double cospi(double __a); +float cospif(float __a); +double cyl_bessel_i0(double __a); +float cyl_bessel_i0f(float __a); +double cyl_bessel_i1(double __a); +float cyl_bessel_i1f(float __a); +double erf(double __a); +double erfc(double __a); +float erfcf(float __a); +double erfcinv(double __a); +float erfcinvf(float __a); +double erfcx(double __a); +float erfcxf(float __a); +float erff(float __a); +double erfinv(double __a); +float erfinvf(float __a); +double exp(double __a); +double exp10(double __a); +float exp10f(float __a); +double exp2(double __a); +float exp2f(float __a); +float expf(float __a); +double expm1(double __a); +float expm1f(float __a); +float fabsf(float __a); +double fdim(double __a, double __b); +float fdimf(float __a, float __b); +double fdivide(double __a, double __b); +float fdividef(float __a, float __b); +double floor(double __f); +float floorf(float __f); +double fma(double __a, double __b, double __c); +float fmaf(float __a, float __b, float __c); +double fmax(double __a, double __b); +float fmaxf(float __a, float __b); +double fmin(double __a, double __b); +float fminf(float __a, float __b); +double fmod(double __a, double __b); +float fmodf(float __a, float __b); +double frexp(double __a, int *__b); +float frexpf(float __a, int *__b); +double hypot(double __a, double __b); +float hypotf(float __a, float __b); +int ilogb(double __a); +int ilogbf(float __a); +double j0(double __a); +float j0f(float __a); +double j1(double __a); +float j1f(float __a); +double jn(int __n, double __a); +float jnf(int __n, float __a); +double ldexp(double __a, int __b); +float ldexpf(float __a, int __b); +double lgamma(double __a); +float lgammaf(float __a); +long long llmax(long long __a, long long __b); +long long llmin(long long __a, long long __b); +long long llrint(double __a); +long long llrintf(float __a); +long long llround(double __a); +long long llroundf(float __a); +double log(double __a); +double log10(double __a); +float log10f(float __a); +double log1p(double __a); +float log1pf(float __a); +double log2(double __a); +float log2f(float __a); +double logb(double __a); +float logbf(float __a); +float logf(float __a); +long lrint(double __a); +long lrintf(float __a); +long lround(double __a); +long lroundf(float __a); +int max(int __a, int __b); +int min(int __a, int __b); +double modf(double __a, double *__b); +float modff(float __a, float *__b); +double nearbyint(double __a); +float nearbyintf(float __a); +double nextafter(double __a, double __b); +float nextafterf(float __a, float __b); +double norm(int __dim, const double *__t); +double norm3d(double __a, double __b, double __c); +float norm3df(float __a, float __b, float __c); +double norm4d(double __a, double __b, double __c, double __d); +float norm4df(float __a, float __b, float __c, float __d); +double normcdf(double __a); +float normcdff(float __a); +double normcdfinv(double __a); +float normcdfinvf(float __a); +float normf(int __dim, const float *__t); +double pow(double __a, double __b); +float powf(float __a, float __b); +double powi(double __a, int __b); +float powif(float __a, int __b); +double rcbrt(double __a); +float rcbrtf(float __a); +double remainder(double __a, double __b); +float remainderf(float __a, float __b); +double remquo(double __a, double __b, int *__c); +float remquof(float __a, float __b, int *__c); +double rhypot(double __a, double __b); +float rhypotf(float __a, float __b); +double rint(double __a); +float rintf(float __a); +double rnorm(int __a, const double *__b); +double rnorm3d(double __a, double __b, double __c); +float rnorm3df(float __a, float __b, float __c); +double rnorm4d(double __a, double __b, double __c, double __d); +float rnorm4df(float __a, float __b, float __c, float __d); +float rnormf(int __dim, const float *__t); +double round(double __a); +float roundf(float __a); +double rsqrt(double __a); +float rsqrtf(float __a); +double scalbn(double __a, int __b); +float scalbnf(float __a, int __b); +double scalbln(double __a, long __b); +float scalblnf(float __a, long __b); +double sin(double __a); +void sincos(double __a, double *__s, double *__c); +void sincosf(float __a, float *__s, float *__c); +void sincospi(double __a, double *__s, double *__c); +void sincospif(float __a, float *__s, float *__c); +float sinf(float __a); +double sinh(double __a); +float sinhf(float __a); +double sinpi(double __a); +float sinpif(float __a); +double sqrt(double __a); +float sqrtf(float __a); +double tan(double __a); +float tanf(float __a); +double tanh(double __a); +float tanhf(float __a); +double tgamma(double __a); +float tgammaf(float __a); +double trunc(double __a); +float truncf(float __a); +unsigned long long ullmax(unsigned long long __a, + unsigned long long __b); +unsigned long long ullmin(unsigned long long __a, + unsigned long long __b); +unsigned int umax(unsigned int __a, unsigned int __b); +unsigned int umin(unsigned int __a, unsigned int __b); +double y0(double __a); +float y0f(float __a); +double y1(double __a); +float y1f(float __a); +double yn(int __a, double __b); +float ynf(int __a, float __b); + +/** + * A positive float constant expression. HUGE_VALF evaluates + * to +infinity. Used as an error value returned by the built-in + * math functions. + */ +#define HUGE_VALF (__builtin_huge_valf()) + +/** + * A positive double constant expression. HUGE_VAL evaluates + * to +infinity. Used as an error value returned by the built-in + * math functions. + */ +#define HUGE_VAL (__builtin_huge_val()) Index: clang/test/Headers/Inputs/include/cstdlib =================================================================== --- clang/test/Headers/Inputs/include/cstdlib +++ clang/test/Headers/Inputs/include/cstdlib @@ -1,5 +1,7 @@ #pragma once +#include <stdlib.h> + #if __cplusplus >= 201703L extern int abs (int __x) throw() __attribute__ ((__const__)) ; extern long int labs (long int __x) throw() __attribute__ ((__const__)) ; @@ -20,4 +22,6 @@ inline long long abs(long long __x) { return __builtin_llabs (__x); } + +float fabs(float __x) { return __builtin_fabs(__x); } } Index: clang/test/Headers/Inputs/include/cmath =================================================================== --- clang/test/Headers/Inputs/include/cmath +++ clang/test/Headers/Inputs/include/cmath @@ -1,5 +1,227 @@ #pragma once -double sqrt(double); +// __clang_cuda_(c)math(.h) also provide `abs` which actually belong in +// cstdlib. We could split them out but for now we just include cstdlib from +// cmath.h which is what the systems I've seen do as well. +#include <cstdlib> + +#include <math.h> + +double acos(double); +float acos(float); +double acosh(double); +float acosh(float); +double asin(double); +float asin(float); +double asinh(double); +float asinh(float); +double atan2(double, double); +float atan2(float, float); +double atan(double); +float atan(float); +double atanh(double); +float atanh(float); +double cbrt(double); +float cbrt(float); +double ceil(double); +float ceil(float); +double copysign(double, double); +float copysign(float, float); +double cos(double); +float cos(float); +double cosh(double); +float cosh(float); +double erfc(double); +float erfc(float); +double erf(double); +float erf(float); +double exp2(double); +float exp2(float); +double exp(double); +float exp(float); +double expm1(double); +float expm1(float); +double fdim(double, double); +float fdim(float, float); +double floor(double); +float floor(float); +double fma(double, double, double); +float fma(float, float, float); +double fmax(double, double); +float fmax(float, float); +double fmin(double, double); +float fmin(float, float); +double fmod(double, double); +float fmod(float, float); +int fpclassify(double); +int fpclassify(float); +double frexp(double, int *); +float frexp(float, int *); +double hypot(double, double); +float hypot(float, float); +int ilogb(double); +int ilogb(float); +bool isfinite(long double); +bool isfinite(double); +bool isfinite(float); +bool isgreater(double, double); +bool isgreaterequal(double, double); +bool isgreaterequal(float, float); +bool isgreater(float, float); +bool isinf(long double); +bool isinf(double); +bool isinf(float); +bool isless(double, double); +bool islessequal(double, double); +bool islessequal(float, float); +bool isless(float, float); +bool islessgreater(double, double); +bool islessgreater(float, float); +bool isnan(long double); +bool isnan(double); +bool isnan(float); +bool isnormal(double); +bool isnormal(float); +bool isunordered(double, double); +bool isunordered(float, float); +double ldexp(double, int); +float ldexp(float, int); +double lgamma(double); +float lgamma(float); +long long llrint(double); +long long llrint(float); +double log10(double); +float log10(float); +double log1p(double); +float log1p(float); +double log2(double); +float log2(float); +double logb(double); +float logb(float); +double log(double); +float log(float); +long lrint(double); +long lrint(float); +long lround(double); +long lround(float); +long long llround(float); // No llround(double). +double modf(double, double *); +float modf(float, float *); +double nan(const char *); +float nanf(const char *); +double nearbyint(double); +float nearbyint(float); +double nextafter(double, double); +float nextafter(float, float); double pow(double, double); -double modf(double, double*); +double pow(double, int); +float pow(float, float); +float pow(float, int); +double remainder(double, double); +float remainder(float, float); +double remquo(double, double, int *); +float remquo(float, float, int *); +double rint(double); +float rint(float); +double round(double); +float round(float); +double scalbln(double, long); +float scalbln(float, long); +double scalbn(double, int); +float scalbn(float, int); +bool signbit(double); +bool signbit(float); +long double sin(long double); +double sin(double); +float sin(float); +double sinh(double); +float sinh(float); +double sqrt(double); +float sqrt(float); +double tan(double); +float tan(float); +double tanh(double); +float tanh(float); +double tgamma(double); +float tgamma(float); +double trunc(double); +float trunc(float); + +namespace std { + +using ::acos; +using ::acosh; +using ::asin; +using ::asinh; +using ::atan; +using ::atan2; +using ::atanh; +using ::cbrt; +using ::ceil; +using ::copysign; +using ::cos; +using ::cosh; +using ::erf; +using ::erfc; +using ::exp; +using ::exp2; +using ::expm1; +using ::fdim; +using ::floor; +using ::fma; +using ::fmax; +using ::fmin; +using ::fmod; +using ::fpclassify; +using ::frexp; +using ::hypot; +using ::ilogb; +using ::isfinite; +using ::isgreater; +using ::isgreaterequal; +using ::isinf; +using ::isless; +using ::islessequal; +using ::islessgreater; +using ::isnan; +using ::isnormal; +using ::isunordered; +using ::ldexp; +using ::lgamma; +using ::llrint; +using ::log; +using ::log10; +using ::log1p; +using ::log2; +using ::logb; +using ::lrint; +using ::lround; +using ::llround; +using ::modf; +using ::nan; +using ::nanf; +using ::nearbyint; +using ::nextafter; +using ::pow; +using ::remainder; +using ::remquo; +using ::rint; +using ::round; +using ::scalbln; +using ::scalbn; +using ::signbit; +using ::sin; +using ::sinh; +using ::sqrt; +using ::tan; +using ::tanh; +using ::tgamma; +using ::trunc; + +} // namespace std + +#define FP_NAN 0 +#define FP_INFINITE 1 +#define FP_ZERO 2 +#define FP_SUBNORMAL 3 +#define FP_NORMAL 4 Index: clang/test/Headers/Inputs/include/climits =================================================================== --- /dev/null +++ clang/test/Headers/Inputs/include/climits @@ -0,0 +1,4 @@ +#pragma once + +#define INT_MIN -2147483648 +#define INT_MAX 2147483647 Index: clang/lib/Headers/openmp_wrappers/time.h =================================================================== --- /dev/null +++ clang/lib/Headers/openmp_wrappers/time.h @@ -0,0 +1,31 @@ +/*===---- time.h - OpenMP time header wrapper ------------------------ c ---=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_OPENMP_TIME_H__ +#define __CLANG_OPENMP_TIME_H__ + +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." +#endif + +#include_next <time.h> + +#pragma omp begin declare variant match(device={arch(nvptx64)}) +static __attribute__((always_inline)) clock_t int clock() { + return __nvvm_read_ptx_sreg_clock(); +} +#pragma omp end declare variant + +#pragma omp begin declare variant match(device={arch(nvptx)}) +static __attribute__((always_inline)) clock_t int clock() { + return __nvvm_read_ptx_sreg_clock(); +} +#pragma omp end declare variant + +#endif Index: clang/lib/Headers/openmp_wrappers/math.h =================================================================== --- clang/lib/Headers/openmp_wrappers/math.h +++ clang/lib/Headers/openmp_wrappers/math.h @@ -1,4 +1,4 @@ -/*===------------- math.h - Alternative math.h header ----------------------=== +/*===---- openmp_wrapper/math.h -------- OpenMP math.h intercept ------ c++ -=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. @@ -7,11 +7,36 @@ *===-----------------------------------------------------------------------=== */ -#include <__clang_openmp_math.h> +#ifndef __CLANG_OPENMP_MATH_H__ +#define __CLANG_OPENMP_MATH_H__ -#ifndef __CLANG_NO_HOST_MATH__ -#include_next <math.h> -#else -#undef __CLANG_NO_HOST_MATH__ +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." #endif +#include_next <math.h> + +// We need limits.h for __clang_cuda_math.h below and because it should not hurt +// we include it eagerly here. +#include <limits.h> + +#pragma omp begin declare variant match(device={arch(nvptx64)}) +#define __CUDA__ + +#include <__clang_cuda_math.h> + +// TODO: Hack until we support an extension to the match clause that allows "or". +#undef __CLANG_CUDA_MATH_H__ + +#undef __CUDA__ +#pragma omp end declare variant + +#pragma omp begin declare variant match(device={arch(nvptx)}) +#define __CUDA__ + +#include <__clang_cuda_math.h> + +#undef __CUDA__ +#pragma omp end declare variant + +#endif Index: clang/lib/Headers/openmp_wrappers/cmath =================================================================== --- clang/lib/Headers/openmp_wrappers/cmath +++ clang/lib/Headers/openmp_wrappers/cmath @@ -1,4 +1,4 @@ -/*===-------------- cmath - Alternative cmath header -----------------------=== +/*===---- __clang_openmp_device_functions.h - OpenMP math declares ------ c++ -=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. @@ -7,10 +7,36 @@ *===-----------------------------------------------------------------------=== */ -#include <__clang_openmp_math.h> +#ifndef __CLANG_OPENMP_CMATH_H__ +#define __CLANG_OPENMP_CMATH_H__ + +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." +#endif -#ifndef __CLANG_NO_HOST_MATH__ #include_next <cmath> -#else -#undef __CLANG_NO_HOST_MATH__ + +// Make sure we include our math.h overlay, it probably happend already but we +// need to be sure. +#include <math.h> + +#pragma omp begin declare variant match(device={arch(nvptx64)}) +#define __CUDA__ + +#include <__clang_cuda_cmath.h> + +// TODO: Hack until we support an extension to the match clause that allows "or". +#undef __CLANG_CUDA_CMATH_H__ + +#undef __CUDA__ +#pragma omp end declare variant + +#pragma omp begin declare variant match(device={arch(nvptx)}) +#define __CUDA__ + +#include <__clang_cuda_cmath.h> + +#undef __CUDA__ +#pragma omp end declare variant + #endif Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h =================================================================== --- clang/lib/Headers/openmp_wrappers/__clang_openmp_math_declares.h +++ /dev/null @@ -1,33 +0,0 @@ -/*===---- __clang_openmp_math_declares.h - OpenMP math declares ------------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===-----------------------------------------------------------------------=== - */ - -#ifndef __CLANG_OPENMP_MATH_DECLARES_H__ -#define __CLANG_OPENMP_MATH_DECLARES_H__ - -#ifndef _OPENMP -#error "This file is for OpenMP compilation only." -#endif - -#if defined(__NVPTX__) && defined(_OPENMP) - -#define __CUDA__ - -#if defined(__cplusplus) - #include <__clang_cuda_math_forward_declares.h> -#endif - -/// Include declarations for libdevice functions. -#include <__clang_cuda_libdevice_declares.h> -/// Provide definitions for these functions. -#include <__clang_cuda_device_functions.h> - -#undef __CUDA__ - -#endif -#endif Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h =================================================================== --- clang/lib/Headers/openmp_wrappers/__clang_openmp_math.h +++ /dev/null @@ -1,35 +0,0 @@ -/*===---- __clang_openmp_math.h - OpenMP target math support ---------------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===-----------------------------------------------------------------------=== - */ - -#if defined(__NVPTX__) && defined(_OPENMP) -/// TODO: -/// We are currently reusing the functionality of the Clang-CUDA code path -/// as an alternative to the host declarations provided by math.h and cmath. -/// This is suboptimal. -/// -/// We should instead declare the device functions in a similar way, e.g., -/// through OpenMP 5.0 variants, and afterwards populate the module with the -/// host declarations by unconditionally including the host math.h or cmath, -/// respectively. This is actually what the Clang-CUDA code path does, using -/// __device__ instead of variants to avoid redeclarations and get the desired -/// overload resolution. - -#define __CUDA__ - -#if defined(__cplusplus) - #include <__clang_cuda_cmath.h> -#endif - -#undef __CUDA__ - -/// Magic macro for stopping the math.h/cmath host header from being included. -#define __CLANG_NO_HOST_MATH__ - -#endif - Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h =================================================================== --- /dev/null +++ clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h @@ -0,0 +1,60 @@ +/*===- __clang_openmp_device_functions.h - OpenMP device function declares -=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_OPENMP_DEVICE_FUNCTIONS_H__ +#define __CLANG_OPENMP_DEVICE_FUNCTIONS_H__ + +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." +#endif + +#pragma omp begin declare variant match(device={arch(nvptx64)}) +#define __CUDA__ + +#ifdef __cplusplus +extern "C" { +#endif + +/// Include declarations for libdevice functions. +#include <__clang_cuda_libdevice_declares.h> + +/// Provide definitions for these functions. +#include <__clang_cuda_device_functions.h> + +#ifdef __cplusplus +} // extern "C" +#endif + +#undef __CUDA__ +// TODO: Hack until we support an extension to the match clause that allows "or". +#undef __CLANG_CUDA_LIBDEVICE_DECLARES_H__ +#undef __CLANG_CUDA_DEVICE_FUNCTIONS_H__ +#pragma omp end declare variant + +#pragma omp begin declare variant match(device={arch(nvptx)}) +#define __CUDA__ + +#ifdef __cplusplus +extern "C" { +#endif + +/// Include declarations for libdevice functions. +#include <__clang_cuda_libdevice_declares.h> + +/// Provide definitions for these functions. +#include <__clang_cuda_device_functions.h> + +#ifdef __cplusplus +} // extern "C" +#endif + +#undef __CUDA__ +#pragma omp end declare variant + +#endif Index: clang/lib/Headers/__clang_cuda_math_forward_declares.h =================================================================== --- clang/lib/Headers/__clang_cuda_math_forward_declares.h +++ clang/lib/Headers/__clang_cuda_math_forward_declares.h @@ -20,37 +20,14 @@ // would preclude the use of our own __device__ overloads for these functions. #pragma push_macro("__DEVICE__") -#ifdef _OPENMP -#define __DEVICE__ static __inline__ __attribute__((always_inline)) -#else #define __DEVICE__ \ static __inline__ __attribute__((always_inline)) __attribute__((device)) -#endif - -// For C++ 17 we need to include noexcept attribute to be compatible -// with the header-defined version. This may be removed once -// variant is supported. -#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L -#define __NOEXCEPT noexcept -#else -#define __NOEXCEPT -#endif -#if !(defined(_OPENMP) && defined(__cplusplus)) __DEVICE__ long abs(long); __DEVICE__ long long abs(long long); __DEVICE__ double abs(double); __DEVICE__ float abs(float); -#endif -// While providing the CUDA declarations and definitions for math functions, -// we may manually define additional functions. -// TODO: Once variant is supported the additional functions will have -// to be removed. -#if defined(_OPENMP) && defined(__cplusplus) -__DEVICE__ const double abs(const double); -__DEVICE__ const float abs(const float); -#endif -__DEVICE__ int abs(int) __NOEXCEPT; +__DEVICE__ int abs(int); __DEVICE__ double acos(double); __DEVICE__ float acos(float); __DEVICE__ double acosh(double); @@ -85,8 +62,8 @@ __DEVICE__ float exp(float); __DEVICE__ double expm1(double); __DEVICE__ float expm1(float); -__DEVICE__ double fabs(double) __NOEXCEPT; -__DEVICE__ float fabs(float) __NOEXCEPT; +__DEVICE__ double fabs(double); +__DEVICE__ float fabs(float); __DEVICE__ double fdim(double, double); __DEVICE__ float fdim(float, float); __DEVICE__ double floor(double); @@ -136,12 +113,12 @@ __DEVICE__ bool isnormal(float); __DEVICE__ bool isunordered(double, double); __DEVICE__ bool isunordered(float, float); -__DEVICE__ long labs(long) __NOEXCEPT; +__DEVICE__ long labs(long); __DEVICE__ double ldexp(double, int); __DEVICE__ float ldexp(float, int); __DEVICE__ double lgamma(double); __DEVICE__ float lgamma(float); -__DEVICE__ long long llabs(long long) __NOEXCEPT; +__DEVICE__ long long llabs(long long); __DEVICE__ long long llrint(double); __DEVICE__ long long llrint(float); __DEVICE__ double log10(double); @@ -152,9 +129,6 @@ __DEVICE__ float log2(float); __DEVICE__ double logb(double); __DEVICE__ float logb(float); -#if defined(_OPENMP) && defined(__cplusplus) -__DEVICE__ long double log(long double); -#endif __DEVICE__ double log(double); __DEVICE__ float log(float); __DEVICE__ long lrint(double); @@ -302,7 +276,6 @@ } // namespace std #endif -#undef __NOEXCEPT #pragma pop_macro("__DEVICE__") #endif Index: clang/lib/Headers/__clang_cuda_math.h =================================================================== --- clang/lib/Headers/__clang_cuda_math.h +++ clang/lib/Headers/__clang_cuda_math.h @@ -23,8 +23,12 @@ // functions and __forceinline__ helps inlining these wrappers at -O1. #pragma push_macro("__DEVICE__") #ifdef _OPENMP +#if defined(__cplusplus) #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) #else +#define __DEVICE__ static __attribute__((always_inline, nothrow)) +#endif +#else #define __DEVICE__ static __device__ __forceinline__ #endif Index: clang/lib/Headers/__clang_cuda_device_functions.h =================================================================== --- clang/lib/Headers/__clang_cuda_device_functions.h +++ clang/lib/Headers/__clang_cuda_device_functions.h @@ -21,7 +21,7 @@ // functions and __forceinline__ helps inlining these wrappers at -O1. #pragma push_macro("__DEVICE__") #ifdef _OPENMP -#define __DEVICE__ static __attribute__((always_inline)) +#define __DEVICE__ static __attribute__((always_inline, nothrow)) #else #define __DEVICE__ static __device__ __forceinline__ #endif @@ -33,7 +33,7 @@ __DEVICE__ unsigned long long __brevll(unsigned long long __a) { return __nv_brevll(__a); } -#if defined(__cplusplus) +#if !defined(_OPENMP) && defined(__cplusplus) __DEVICE__ void __brkpt() { asm volatile("brkpt;"); } __DEVICE__ void __brkpt(int __a) { __brkpt(); } #else @@ -1463,5 +1463,6 @@ return r; } #endif // CUDA_VERSION >= 9020 + #pragma pop_macro("__DEVICE__") #endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__ Index: clang/lib/Headers/__clang_cuda_cmath.h =================================================================== --- clang/lib/Headers/__clang_cuda_cmath.h +++ clang/lib/Headers/__clang_cuda_cmath.h @@ -31,31 +31,15 @@ // std covers all of the known knowns. #ifdef _OPENMP -#define __DEVICE__ static __attribute__((always_inline)) +#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) #else #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline)) #endif -// For C++ 17 we need to include noexcept attribute to be compatible -// with the header-defined version. This may be removed once -// variant is supported. -#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L -#define __NOEXCEPT noexcept -#else -#define __NOEXCEPT -#endif - -#if !(defined(_OPENMP) && defined(__cplusplus)) __DEVICE__ long long abs(long long __n) { return ::llabs(__n); } __DEVICE__ long abs(long __n) { return ::labs(__n); } __DEVICE__ float abs(float __x) { return ::fabsf(__x); } __DEVICE__ double abs(double __x) { return ::fabs(__x); } -#endif -// TODO: remove once variat is supported. -#if defined(_OPENMP) && defined(__cplusplus) -__DEVICE__ const float abs(const float __x) { return ::fabsf((float)__x); } -__DEVICE__ const double abs(const double __x) { return ::fabs((double)__x); } -#endif __DEVICE__ float acos(float __x) { return ::acosf(__x); } __DEVICE__ float acosh(float __x) { return ::acoshf(__x); } __DEVICE__ float asin(float __x) { return ::asinf(__x); } @@ -72,12 +56,10 @@ __DEVICE__ float exp(float __x) { return ::expf(__x); } __DEVICE__ float exp2(float __x) { return ::exp2f(__x); } __DEVICE__ float expm1(float __x) { return ::expm1f(__x); } -__DEVICE__ float fabs(float __x) __NOEXCEPT { return ::fabsf(__x); } +__DEVICE__ float fabs(float __x) { return ::fabsf(__x); } __DEVICE__ float floor(float __x) { return ::floorf(__x); } __DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); } __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } -// TODO: remove when variant is supported -#ifndef _OPENMP __DEVICE__ int fpclassify(float __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); @@ -86,7 +68,6 @@ return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } -#endif __DEVICE__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } @@ -197,6 +178,8 @@ // std. These are defined in the CUDA headers in the global namespace, // independent of everything else we've done here. +#ifndef _OPENMP + // We can't use std::enable_if, because we want to be pre-C++11 compatible. But // we go ahead and unconditionally define functions that are only available when // compiling for C++11 to match the behavior of the CUDA headers. @@ -481,10 +464,7 @@ using ::remquof; using ::rintf; using ::roundf; -// TODO: remove once variant is supported -#ifndef _OPENMP using ::scalblnf; -#endif using ::scalbnf; using ::sinf; using ::sinhf; @@ -503,7 +483,8 @@ } // namespace std #endif -#undef __NOEXCEPT +#endif // _OPENMP + #undef __DEVICE__ #endif Index: clang/lib/Headers/CMakeLists.txt =================================================================== --- clang/lib/Headers/CMakeLists.txt +++ clang/lib/Headers/CMakeLists.txt @@ -143,8 +143,7 @@ set(openmp_wrapper_files openmp_wrappers/math.h openmp_wrappers/cmath - openmp_wrappers/__clang_openmp_math.h - openmp_wrappers/__clang_openmp_math_declares.h + openmp_wrappers/__clang_openmp_device_functions.h openmp_wrappers/new ) Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -1216,7 +1216,7 @@ } CmdArgs.push_back("-include"); - CmdArgs.push_back("__clang_openmp_math_declares.h"); + CmdArgs.push_back("__clang_openmp_device_functions.h"); } // Add -i* options, and automatically translate to
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits