[PATCH] D80897: [OpenMP] Initial support for std::complex in target regions

Johannes Doerfert via Phabricator via cfe-commits Sun, 31 May 2020 13:18:59 -0700

jdoerfert created this revision.
jdoerfert added reviewers: tra, hfinkel, ABataev, JonChesterfield.
Herald added subscribers: sstefan1, guansong, bollu, yaxunl, mgorny.
Herald added a project: clang.


This simply follows the scheme we have for other wrappers. It resolves
the current link problem, e.g., `__muldc3 not found`, when std::complex
operations are used on a device.

In "CUDA mode" this should allow simple complex operations to work in
target regions. Normal mode doesn't work because the globalization in
the std::complex operators is somehow broken. This will most likely not
allow complex make math function calls to work properly, e.g., sin, but
that is more complex (pan intended) anyway.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D80897

Files:
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/__clang_cuda_complex_builtins.h
  clang/lib/Headers/openmp_wrappers/complex
  clang/test/Headers/Inputs/include/complex
  clang/test/Headers/Inputs/include/cstdlib
  clang/test/Headers/Inputs/include/math.h
  clang/test/Headers/nvptx_device_math_complex.cpp

Index: clang/test/Headers/nvptx_device_math_complex.cpp
===================================================================
--- /dev/null
+++ clang/test/Headers/nvptx_device_math_complex.cpp
@@ -0,0 +1,25 @@
+// REQUIRES: nvptx-registered-target
+// RUN: %clang_cc1 -verify -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -aux-triple powerpc64le-unknown-unknown -o - | FileCheck %s
+// expected-no-diagnostics
+
+#include <complex>
+
+// CHECK-DAG: define {{.*}} @__mulsc3
+// CHECK-DAG: define {{.*}} @__muldc3
+// CHECK-DAG: define {{.*}} @__divsc3
+// CHECK-DAG: define {{.*}} @__divdc3
+
+void test_scmplx(std::complex<float> a) {
+#pragma omp target
+  {
+    (void)(a * (a / a));
+  }
+}
+
+void test_dcmplx(std::complex<double> a) {
+#pragma omp target
+  {
+    (void)(a * (a / a));
+  }
+}
Index: clang/test/Headers/Inputs/include/math.h
===================================================================
--- clang/test/Headers/Inputs/include/math.h
+++ clang/test/Headers/Inputs/include/math.h
@@ -107,6 +107,10 @@
 long lroundf(float __a);
 int max(int __a, int __b);
 int min(int __a, int __b);
+float max(float __a, float __b);
+float min(float __a, float __b);
+double max(double __a, double __b);
+double min(double __a, double __b);
 double modf(double __a, double *__b);
 float modff(float __a, float *__b);
 double nearbyint(double __a);
Index: clang/test/Headers/Inputs/include/cstdlib
===================================================================
--- clang/test/Headers/Inputs/include/cstdlib
+++ clang/test/Headers/Inputs/include/cstdlib
@@ -24,4 +24,8 @@
 abs(long long __x) { return __builtin_llabs (__x); }
 
 float fabs(float __x) { return __builtin_fabs(__x); }
+
+float abs(float __x) { return fabs(__x); }
+double abs(double __x) { return fabs(__x); }
+
 }
Index: clang/test/Headers/Inputs/include/complex
===================================================================
--- /dev/null
+++ clang/test/Headers/Inputs/include/complex
@@ -0,0 +1,301 @@
+#pragma once
+
+#include <cmath>
+
+#define INFINITY (__builtin_inff())
+
+namespace std {
+
+// Taken from libc++
+template <class _Tp>
+class complex {
+public:
+  typedef _Tp value_type;
+
+private:
+  value_type __re_;
+  value_type __im_;
+
+public:
+  complex(const value_type &__re = value_type(), const value_type &__im = value_type())
+      : __re_(__re), __im_(__im) {}
+  template <class _Xp>
+  complex(const complex<_Xp> &__c)
+      : __re_(__c.real()), __im_(__c.imag()) {}
+
+  value_type real() const { return __re_; }
+  value_type imag() const { return __im_; }
+
+  void real(value_type __re) { __re_ = __re; }
+  void imag(value_type __im) { __im_ = __im; }
+
+  complex &operator=(const value_type &__re) {
+    __re_ = __re;
+    __im_ = value_type();
+    return *this;
+  }
+  complex &operator+=(const value_type &__re) {
+    __re_ += __re;
+    return *this;
+  }
+  complex &operator-=(const value_type &__re) {
+    __re_ -= __re;
+    return *this;
+  }
+  complex &operator*=(const value_type &__re) {
+    __re_ *= __re;
+    __im_ *= __re;
+    return *this;
+  }
+  complex &operator/=(const value_type &__re) {
+    __re_ /= __re;
+    __im_ /= __re;
+    return *this;
+  }
+
+  template <class _Xp>
+  complex &operator=(const complex<_Xp> &__c) {
+    __re_ = __c.real();
+    __im_ = __c.imag();
+    return *this;
+  }
+  template <class _Xp>
+  complex &operator+=(const complex<_Xp> &__c) {
+    __re_ += __c.real();
+    __im_ += __c.imag();
+    return *this;
+  }
+  template <class _Xp>
+  complex &operator-=(const complex<_Xp> &__c) {
+    __re_ -= __c.real();
+    __im_ -= __c.imag();
+    return *this;
+  }
+  template <class _Xp>
+  complex &operator*=(const complex<_Xp> &__c) {
+    *this = *this * complex(__c.real(), __c.imag());
+    return *this;
+  }
+  template <class _Xp>
+  complex &operator/=(const complex<_Xp> &__c) {
+    *this = *this / complex(__c.real(), __c.imag());
+    return *this;
+  }
+};
+
+template <class _Tp>
+inline complex<_Tp>
+operator+(const complex<_Tp> &__x, const complex<_Tp> &__y) {
+  complex<_Tp> __t(__x);
+  __t += __y;
+  return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator+(const complex<_Tp> &__x, const _Tp &__y) {
+  complex<_Tp> __t(__x);
+  __t += __y;
+  return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator+(const _Tp &__x, const complex<_Tp> &__y) {
+  complex<_Tp> __t(__y);
+  __t += __x;
+  return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator-(const complex<_Tp> &__x, const complex<_Tp> &__y) {
+  complex<_Tp> __t(__x);
+  __t -= __y;
+  return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator-(const complex<_Tp> &__x, const _Tp &__y) {
+  complex<_Tp> __t(__x);
+  __t -= __y;
+  return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator-(const _Tp &__x, const complex<_Tp> &__y) {
+  complex<_Tp> __t(-__y);
+  __t += __x;
+  return __t;
+}
+
+template <class _Tp>
+complex<_Tp>
+operator*(const complex<_Tp> &__z, const complex<_Tp> &__w) {
+  _Tp __a = __z.real();
+  _Tp __b = __z.imag();
+  _Tp __c = __w.real();
+  _Tp __d = __w.imag();
+  _Tp __ac = __a * __c;
+  _Tp __bd = __b * __d;
+  _Tp __ad = __a * __d;
+  _Tp __bc = __b * __c;
+  _Tp __x = __ac - __bd;
+  _Tp __y = __ad + __bc;
+  if (std::isnan(__x) && std::isnan(__y)) {
+    bool __recalc = false;
+    if (std::isinf(__a) || std::isinf(__b)) {
+      __a = copysign(std::isinf(__a) ? _Tp(1) : _Tp(0), __a);
+      __b = copysign(std::isinf(__b) ? _Tp(1) : _Tp(0), __b);
+      if (std::isnan(__c))
+        __c = copysign(_Tp(0), __c);
+      if (std::isnan(__d))
+        __d = copysign(_Tp(0), __d);
+      __recalc = true;
+    }
+    if (std::isinf(__c) || std::isinf(__d)) {
+      __c = copysign(std::isinf(__c) ? _Tp(1) : _Tp(0), __c);
+      __d = copysign(std::isinf(__d) ? _Tp(1) : _Tp(0), __d);
+      if (std::isnan(__a))
+        __a = copysign(_Tp(0), __a);
+      if (std::isnan(__b))
+        __b = copysign(_Tp(0), __b);
+      __recalc = true;
+    }
+    if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||
+                      std::isinf(__ad) || std::isinf(__bc))) {
+      if (std::isnan(__a))
+        __a = copysign(_Tp(0), __a);
+      if (std::isnan(__b))
+        __b = copysign(_Tp(0), __b);
+      if (std::isnan(__c))
+        __c = copysign(_Tp(0), __c);
+      if (std::isnan(__d))
+        __d = copysign(_Tp(0), __d);
+      __recalc = true;
+    }
+    if (__recalc) {
+      __x = _Tp(INFINITY) * (__a * __c - __b * __d);
+      __y = _Tp(INFINITY) * (__a * __d + __b * __c);
+    }
+  }
+  return complex<_Tp>(__x, __y);
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator*(const complex<_Tp> &__x, const _Tp &__y) {
+  complex<_Tp> __t(__x);
+  __t *= __y;
+  return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator*(const _Tp &__x, const complex<_Tp> &__y) {
+  complex<_Tp> __t(__y);
+  __t *= __x;
+  return __t;
+}
+
+template <class _Tp>
+complex<_Tp>
+operator/(const complex<_Tp> &__z, const complex<_Tp> &__w) {
+  int __ilogbw = 0;
+  _Tp __a = __z.real();
+  _Tp __b = __z.imag();
+  _Tp __c = __w.real();
+  _Tp __d = __w.imag();
+  _Tp __logbw = logb(fmax(fabs(__c), fabs(__d)));
+  if (std::isfinite(__logbw)) {
+    __ilogbw = static_cast<int>(__logbw);
+    __c = scalbn(__c, -__ilogbw);
+    __d = scalbn(__d, -__ilogbw);
+  }
+  _Tp __denom = __c * __c + __d * __d;
+  _Tp __x = scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
+  _Tp __y = scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
+  if (std::isnan(__x) && std::isnan(__y)) {
+    if ((__denom == _Tp(0)) && (!std::isnan(__a) || !std::isnan(__b))) {
+      __x = copysign(_Tp(INFINITY), __c) * __a;
+      __y = copysign(_Tp(INFINITY), __c) * __b;
+    } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) && std::isfinite(__d)) {
+      __a = copysign(std::isinf(__a) ? _Tp(1) : _Tp(0), __a);
+      __b = copysign(std::isinf(__b) ? _Tp(1) : _Tp(0), __b);
+      __x = _Tp(INFINITY) * (__a * __c + __b * __d);
+      __y = _Tp(INFINITY) * (__b * __c - __a * __d);
+    } else if (std::isinf(__logbw) && __logbw > _Tp(0) && std::isfinite(__a) && std::isfinite(__b)) {
+      __c = copysign(std::isinf(__c) ? _Tp(1) : _Tp(0), __c);
+      __d = copysign(std::isinf(__d) ? _Tp(1) : _Tp(0), __d);
+      __x = _Tp(0) * (__a * __c + __b * __d);
+      __y = _Tp(0) * (__b * __c - __a * __d);
+    }
+  }
+  return complex<_Tp>(__x, __y);
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator/(const complex<_Tp> &__x, const _Tp &__y) {
+  return complex<_Tp>(__x.real() / __y, __x.imag() / __y);
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator/(const _Tp &__x, const complex<_Tp> &__y) {
+  complex<_Tp> __t(__x);
+  __t /= __y;
+  return __t;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator+(const complex<_Tp> &__x) {
+  return __x;
+}
+
+template <class _Tp>
+inline complex<_Tp>
+operator-(const complex<_Tp> &__x) {
+  return complex<_Tp>(-__x.real(), -__x.imag());
+}
+
+template <class _Tp>
+inline bool
+operator==(const complex<_Tp> &__x, const complex<_Tp> &__y) {
+  return __x.real() == __y.real() && __x.imag() == __y.imag();
+}
+
+template <class _Tp>
+inline bool
+operator==(const complex<_Tp> &__x, const _Tp &__y) {
+  return __x.real() == __y && __x.imag() == 0;
+}
+
+template <class _Tp>
+inline bool
+operator==(const _Tp &__x, const complex<_Tp> &__y) {
+  return __x == __y.real() && 0 == __y.imag();
+}
+
+template <class _Tp>
+inline bool
+operator!=(const complex<_Tp> &__x, const complex<_Tp> &__y) {
+  return !(__x == __y);
+}
+
+template <class _Tp>
+inline bool
+operator!=(const complex<_Tp> &__x, const _Tp &__y) {
+  return !(__x == __y);
+}
+
+template <class _Tp>
+inline bool
+operator!=(const _Tp &__x, const complex<_Tp> &__y) {
+  return !(__x == __y);
+}
+
+} // namespace std
Index: clang/lib/Headers/openmp_wrappers/complex
===================================================================
--- /dev/null
+++ clang/lib/Headers/openmp_wrappers/complex
@@ -0,0 +1,30 @@
+/*===-- complex --- OpenMP complex wrapper for target regions --------- c++ -===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CLANG_OPENMP_COMPLEX_H__
+#define __CLANG_OPENMP_COMPLEX_H__
+
+#ifndef _OPENMP
+#error "This file is for OpenMP compilation only."
+#endif
+
+// We require std::math functions in the complex builtins below.
+#include <cmath>
+
+#pragma omp begin declare variant match(                                       \
+    device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
+
+#define __CUDA__
+#include <__clang_cuda_complex_builtins.h>
+#endif
+
+#pragma omp end declare variant
+
+// Grab the host header too.
+#include_next <complex>
Index: clang/lib/Headers/__clang_cuda_complex_builtins.h
===================================================================
--- clang/lib/Headers/__clang_cuda_complex_builtins.h
+++ clang/lib/Headers/__clang_cuda_complex_builtins.h
@@ -15,8 +15,20 @@
 // operations.  (These implementations come from libc++, and have been modified
 // to work with CUDA.)
 
-extern "C" inline __device__ double _Complex __muldc3(double __a, double __b,
-                                                      double __c, double __d) {
+#pragma push_macro("__DEVICE__")
+#ifdef _OPENMP
+#pragma omp declare target
+#define __DEVICE__ __attribute__((nothrow))
+#else
+#define __DEVICE__ __device__ inline
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c,
+                                    double __d) {
   double __ac = __a * __c;
   double __bd = __b * __d;
   double __ad = __a * __d;
@@ -59,15 +71,14 @@
     if (__recalc) {
       // Can't use std::numeric_limits<double>::infinity() -- that doesn't have
       // a device overload (and isn't constexpr before C++11, naturally).
-      __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
-      __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
+      __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d);
+      __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c);
     }
   }
   return z;
 }
 
-extern "C" inline __device__ float _Complex __mulsc3(float __a, float __b,
-                                                     float __c, float __d) {
+__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) {
   float __ac = __a * __c;
   float __bd = __b * __d;
   float __ad = __a * __d;
@@ -115,8 +126,8 @@
   return z;
 }
 
-extern "C" inline __device__ double _Complex __divdc3(double __a, double __b,
-                                                      double __c, double __d) {
+__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
+                                    double __d) {
   int __ilogbw = 0;
   // Can't use std::max, because that's defined in <algorithm>, and we don't
   // want to pull that in for every compile.  The CUDA headers define
@@ -133,14 +144,14 @@
   __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
   if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
     if ((__denom == 0.0) && (!std::isnan(__a) || !std::isnan(__b))) {
-      __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;
-      __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;
+      __real__(z) = std::copysign(__builtin_huge_val(), __c) * __a;
+      __imag__(z) = std::copysign(__builtin_huge_val(), __c) * __b;
     } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&
                std::isfinite(__d)) {
       __a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a);
       __b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b);
-      __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
-      __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
+      __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d);
+      __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d);
     } else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) &&
                std::isfinite(__b)) {
       __c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c);
@@ -152,8 +163,7 @@
   return z;
 }
 
-extern "C" inline __device__ float _Complex __divsc3(float __a, float __b,
-                                                     float __c, float __d) {
+__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
   int __ilogbw = 0;
   float __logbw = std::logb(max(std::abs(__c), std::abs(__d)));
   if (std::isfinite(__logbw)) {
@@ -186,4 +196,14 @@
   return z;
 }
 
+#if defined(__cplusplus)
+} // extern "C"
+#endif
+
+#ifdef _OPENMP
+#pragma omp end declare target
+#endif
+
+#pragma pop_macro("__DEVICE__")
+
 #endif // __CLANG_CUDA_COMPLEX_BUILTINS
Index: clang/lib/Headers/CMakeLists.txt
===================================================================
--- clang/lib/Headers/CMakeLists.txt
+++ clang/lib/Headers/CMakeLists.txt
@@ -147,6 +147,7 @@
 set(openmp_wrapper_files
   openmp_wrappers/math.h
   openmp_wrappers/cmath
+  openmp_wrappers/complex
   openmp_wrappers/__clang_openmp_device_functions.h
   openmp_wrappers/new
 )

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80897: [OpenMP] Initial support for std::complex in target regions

Reply via email to