Author: Ethan Stewart Date: 2021-06-23T15:26:09+01:00 New Revision: 5dfdc1812d9b9c043204d39318f6446424d8f2d7
URL: https://github.com/llvm/llvm-project/commit/5dfdc1812d9b9c043204d39318f6446424d8f2d7 DIFF: https://github.com/llvm/llvm-project/commit/5dfdc1812d9b9c043204d39318f6446424d8f2d7.diff LOG: [OpenMP][AMDGCN] Apply fix for isnan, isinf and isfinite for amdgcn. This fixes issues with various return types(bool/int) and was already in place for nvptx headers, adjusted to work for amdgcn. This does not affect hip as the change is guarded with OPENMP_AMDGCN. Similar to D85879. Reviewed By: jdoerfert, JonChesterfield, yaxunl Differential Revision: https://reviews.llvm.org/D104677 Added: Modified: clang/lib/Headers/__clang_hip_cmath.h clang/test/Headers/hip-header.hip clang/test/Headers/openmp_device_math_isnan.cpp Removed: ################################################################################ diff --git a/clang/lib/Headers/__clang_hip_cmath.h b/clang/lib/Headers/__clang_hip_cmath.h index b5d7c16ac5e41..7342705434e6b 100644 --- a/clang/lib/Headers/__clang_hip_cmath.h +++ b/clang/lib/Headers/__clang_hip_cmath.h @@ -52,8 +52,46 @@ __DEVICE__ int fpclassify(double __x) { __DEVICE__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } + +#if defined(__OPENMP_AMDGCN__) +// For OpenMP we work around some old system headers that have non-conforming +// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do +// this by providing two versions of these functions, diff ering only in the +// return type. To avoid conflicting definitions we disable implicit base +// function generation. That means we will end up with two specializations, one +// per type, but only one has a base function defined by the system header. +#pragma omp begin declare variant match( \ + implementation = {extension(disable_implicit_base)}) + +// FIXME: We lack an extension to customize the mangling of the variants, e.g., +// add a suffix. This means we would clash with the names of the variants +// (note that we do not create implicit base functions here). To avoid +// this clash we add a new trait to some of them that is always true +// (this is LLVM after all ;)). It will only influence the mangled name +// of the variants inside the inner region and avoid the clash. +#pragma omp begin declare variant match(implementation = {vendor(llvm)}) + +__DEVICE__ int isinf(float __x) { return ::__isinff(__x); } +__DEVICE__ int isinf(double __x) { return ::__isinf(__x); } +__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); } +__DEVICE__ int isfinite(double __x) { return ::__finite(__x); } +__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); } +__DEVICE__ int isnan(double __x) { return ::__isnan(__x); } + +#pragma omp end declare variant +#endif // defined(__OPENMP_AMDGCN__) + +__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } +__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } __DEVICE__ bool isfinite(double __x) { return ::__finite(__x); } +__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } +__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } + +#if defined(__OPENMP_AMDGCN__) +#pragma omp end declare variant +#endif // defined(__OPENMP_AMDGCN__) + __DEVICE__ bool isgreater(float __x, float __y) { return __builtin_isgreater(__x, __y); } @@ -66,8 +104,6 @@ __DEVICE__ bool isgreaterequal(float __x, float __y) { __DEVICE__ bool isgreaterequal(double __x, double __y) { return __builtin_isgreaterequal(__x, __y); } -__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } -__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } __DEVICE__ bool isless(float __x, float __y) { return __builtin_isless(__x, __y); } @@ -86,8 +122,6 @@ __DEVICE__ bool islessgreater(float __x, float __y) { __DEVICE__ bool islessgreater(double __x, double __y) { return __builtin_islessgreater(__x, __y); } -__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } -__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } __DEVICE__ bool isunordered(float __x, float __y) { diff --git a/clang/test/Headers/hip-header.hip b/clang/test/Headers/hip-header.hip index 5bba1de2ce6c4..0e95d58d55700 100644 --- a/clang/test/Headers/hip-header.hip +++ b/clang/test/Headers/hip-header.hip @@ -8,6 +8,20 @@ // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ // RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ // RUN: -internal-isystem %S/Inputs/include \ +// RUN: -include cmath \ +// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ +// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \ +// RUN: -D__HIPCC_RTC__ | FileCheck %s -check-prefixes=AMD_BOOL_RETURN +// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ +// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ +// RUN: -internal-isystem %S/Inputs/include \ +// RUN: -include cmath \ +// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ +// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \ +// RUN: -D__HIPCC_RTC__ -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s -check-prefixes=AMD_INT_RETURN +// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ +// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ +// RUN: -internal-isystem %S/Inputs/include \ // RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \ // RUN: -D__HIPCC_RTC__ -std=c++14 | FileCheck -check-prefixes=CHECK,CXX14 %s @@ -84,3 +98,20 @@ __device__ float test_floor() { __device__ float test_max() { return max(5, 6.0); } + +// CHECK-LABEL: define{{.*}}@_Z10test_isnanv +__device__ double test_isnan() { + double r = 0; + double d = 5.0; + float f = 5.0; + + // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float + // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float + r += isnan(f); + + // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double + // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double + r += isnan(d); + + return r ; +} diff --git a/clang/test/Headers/openmp_device_math_isnan.cpp b/clang/test/Headers/openmp_device_math_isnan.cpp index 35443dbdebea6..7a75e4250c951 100644 --- a/clang/test/Headers/openmp_device_math_isnan.cpp +++ b/clang/test/Headers/openmp_device_math_isnan.cpp @@ -1,11 +1,19 @@ // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=BOOL_RETURN +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=AMD_BOOL_RETURN // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast // RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=BOOL_RETURN +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=AMD_BOOL_RETURN // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DUSE_ISNAN_WITH_INT_RETURN +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -DUSE_ISNAN_WITH_INT_RETURN // RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=AMD_INT_RETURN // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN // RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=AMD_INT_RETURN // expected-no-diagnostics #include <cmath> @@ -13,10 +21,14 @@ double math(float f, double d) { double r = 0; // INT_RETURN: call i32 @__nv_isnanf(float + // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float // BOOL_RETURN: call i32 @__nv_isnanf(float + // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float r += std::isnan(f); // INT_RETURN: call i32 @__nv_isnand(double + // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double // BOOL_RETURN: call i32 @__nv_isnand(double + // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double r += std::isnan(d); return r; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits