Re: [PATCH] D23627: [CUDA] Improve handling of math functions.

2016-08-18 Thread Justin Lebar via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL279140: [CUDA] Improve handling of math functions. (authored 
by jlebar).

Changed prior to commit:
  https://reviews.llvm.org/D23627?vs=68427=68600#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D23627

Files:
  cfe/trunk/lib/Headers/__clang_cuda_cmath.h
  cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h

Index: cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h
===
--- cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h
+++ cfe/trunk/lib/Headers/__clang_cuda_math_forward_declares.h
@@ -140,6 +140,7 @@
 __DEVICE__ long lrint(float);
 __DEVICE__ long lround(double);
 __DEVICE__ long lround(float);
+__DEVICE__ long long llround(float); // No llround(double).
 __DEVICE__ double modf(double, double *);
 __DEVICE__ float modf(float, float *);
 __DEVICE__ double nan(const char *);
@@ -149,7 +150,8 @@
 __DEVICE__ double nextafter(double, double);
 __DEVICE__ float nextafter(float, float);
 __DEVICE__ double nexttoward(double, double);
-__DEVICE__ float nexttoward(float, float);
+__DEVICE__ float nexttoward(float, double);
+__DEVICE__ float nexttowardf(float, double);
 __DEVICE__ double pow(double, double);
 __DEVICE__ double pow(double, int);
 __DEVICE__ float pow(float, float);
@@ -235,6 +237,7 @@
 using ::logb;
 using ::lrint;
 using ::lround;
+using ::llround;
 using ::modf;
 using ::nan;
 using ::nanf;
Index: cfe/trunk/lib/Headers/__clang_cuda_cmath.h
===
--- cfe/trunk/lib/Headers/__clang_cuda_cmath.h
+++ cfe/trunk/lib/Headers/__clang_cuda_cmath.h
@@ -26,13 +26,15 @@
 #error "This file is for CUDA compilation only."
 #endif
 
+#include 
+
 // CUDA lets us use various std math functions on the device side.  This file
 // works in concert with __clang_cuda_math_forward_declares.h to make this work.
 //
 // Specifically, the forward-declares header declares __device__ overloads for
 // these functions in the global namespace, then pulls them into namespace std
 // with 'using' statements.  Then this file implements those functions, after
-// the implementations have been pulled in.
+// their implementations have been pulled in.
 //
 // It's important that we declare the functions in the global namespace and pull
 // them into namespace std with using statements, as opposed to simply declaring
@@ -120,12 +122,15 @@
 __DEVICE__ float log(float __x) { return ::logf(__x); }
 __DEVICE__ float log10(float __x) { return ::log10f(__x); }
 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
-__DEVICE__ float nexttoward(float __from, float __to) {
+__DEVICE__ float nexttoward(float __from, double __to) {
   return __builtin_nexttowardf(__from, __to);
 }
 __DEVICE__ double nexttoward(double __from, double __to) {
   return __builtin_nexttoward(__from, __to);
 }
+__DEVICE__ float nexttowardf(float __from, double __to) {
+  return __builtin_nexttowardf(__from, __to);
+}
 __DEVICE__ float pow(float __base, float __exp) {
   return ::powf(__base, __exp);
 }
@@ -143,6 +148,280 @@
 __DEVICE__ float tan(float __x) { return ::tanf(__x); }
 __DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
 
+// Now we've defined everything we promised we'd define in
+// __clang_cuda_math_forward_declares.h.  We need to do two additional things to
+// fix up our math functions.
+//
+// 1) Define __device__ overloads for e.g. sin(int).  The CUDA headers define
+//only sin(float) and sin(double), which means that e.g. sin(0) is
+//ambiguous.
+//
+// 2) Pull the __device__ overloads of "foobarf" math functions into namespace
+//std.  These are defined in the CUDA headers in the global namespace,
+//independent of everything else we've done here.
+
+// We can't use std::enable_if, because we want to be pre-C++11 compatible.  But
+// we go ahead and unconditionally define functions that are only available when
+// compiling for C++11 to match the behavior of the CUDA headers.
+template
+struct __clang_cuda_enable_if {};
+
+template  struct __clang_cuda_enable_if {
+  typedef __T type;
+};
+
+// Defines an overload of __fn that accepts one integral argument, calls
+// __fn((double)x), and returns __retty.
+#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn)  \
+  template   \
+  __DEVICE__   \
+  typename __clang_cuda_enable_if::is_integer,\
+  __retty>::type   \
+  __fn(__T __x) {  \
+return ::__fn((double)__x);\
+  }
+
+// Defines an overload of __fn that accepts one two arithmetic arguments, calls

Re: [PATCH] D23627: [CUDA] Improve handling of math functions.

2016-08-18 Thread Justin Lebar via cfe-commits
jlebar added a comment.

These changes have always been kind of scary.  tra tested this against Thrust 
all combinations of CUDA 7.0/7.5, c++98/11, 
libc++/libstdc++{4.8.5/4.9.3,5.3.0}.  So we should be good here.  I hope.


https://reviews.llvm.org/D23627



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23627: [CUDA] Improve handling of math functions.

2016-08-17 Thread Artem Belevich via cfe-commits
tra accepted this revision.
tra added a comment.
This revision is now accepted and ready to land.

LGTM, but we may want someone familiar with math library to take a look.



Comment at: clang/lib/Headers/__clang_cuda_cmath.h:125-133
@@ -122,8 +124,11 @@
 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); 
}
-__DEVICE__ float nexttoward(float __from, float __to) {
+__DEVICE__ float nexttoward(float __from, double __to) {
   return __builtin_nexttowardf(__from, __to);
 }
 __DEVICE__ double nexttoward(double __from, double __to) {
   return __builtin_nexttoward(__from, __to);
 }
+__DEVICE__ float nexttowardf(float __from, double __to) {
+  return __builtin_nexttowardf(__from, __to);
+}
 __DEVICE__ float pow(float __base, float __exp) {

jlebar wrote:
> tra wrote:
> > You've got two identical `nexttoward(float, double)` now.
> > Perhaps first one was supposed to remain `nexttoward(float, float)` ?
> > 
> > 
> It's hard to see, but one is nexttowardf.
Indeed, I've missed that. 


https://reviews.llvm.org/D23627



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23627: [CUDA] Improve handling of math functions.

2016-08-17 Thread Justin Lebar via cfe-commits
jlebar added inline comments.


Comment at: clang/lib/Headers/__clang_cuda_cmath.h:125-133
@@ -122,8 +124,11 @@
 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); 
}
-__DEVICE__ float nexttoward(float __from, float __to) {
+__DEVICE__ float nexttoward(float __from, double __to) {
   return __builtin_nexttowardf(__from, __to);
 }
 __DEVICE__ double nexttoward(double __from, double __to) {
   return __builtin_nexttoward(__from, __to);
 }
+__DEVICE__ float nexttowardf(float __from, double __to) {
+  return __builtin_nexttowardf(__from, __to);
+}
 __DEVICE__ float pow(float __base, float __exp) {

tra wrote:
> You've got two identical `nexttoward(float, double)` now.
> Perhaps first one was supposed to remain `nexttoward(float, float)` ?
> 
> 
It's hard to see, but one is nexttowardf.


Comment at: clang/lib/Headers/__clang_cuda_cmath.h:184-197
@@ +183,16 @@
+
+// Defines an overload of __fn that accepts one two arithmetic arguments, calls
+// __fn((double)x, (double)y), and returns a double.
+//
+// Note this is different from OVERLOAD_1, which generates an overload that
+// accepts only *integral* arguments.
+#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn)  
\
+  template   
\
+  __DEVICE__ typename __clang_cuda_enable_if<  
\
+  std::numeric_limits<__T1>::is_specialized && 
\
+  std::numeric_limits<__T2>::is_specialized,   
\
+  __retty>::type   
\
+  __fn(__T1 __x, __T2 __y) {   
\
+return __fn((double)__x, (double)__y); 
\
+  }
+

tra wrote:
> `is_specialized` will be true for `long double` args and we'll instantiate 
> the function. Can we/should we produce an error instead?
I think it's OK.  Or at least, long double is kind of screwed up at the moment. 
 Sometimes we pick `__host__` overloads, sometimes we pick `__device__` 
overloads; I made no effort to make it correct.  I'm much more bullish on 
making use of long double a compile error as a way to solve these problems.


https://reviews.llvm.org/D23627



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23627: [CUDA] Improve handling of math functions.

2016-08-17 Thread Artem Belevich via cfe-commits
tra added inline comments.


Comment at: clang/lib/Headers/__clang_cuda_cmath.h:125-133
@@ -122,8 +124,11 @@
 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); 
}
-__DEVICE__ float nexttoward(float __from, float __to) {
+__DEVICE__ float nexttoward(float __from, double __to) {
   return __builtin_nexttowardf(__from, __to);
 }
 __DEVICE__ double nexttoward(double __from, double __to) {
   return __builtin_nexttoward(__from, __to);
 }
+__DEVICE__ float nexttowardf(float __from, double __to) {
+  return __builtin_nexttowardf(__from, __to);
+}
 __DEVICE__ float pow(float __base, float __exp) {

You've got two identical `nexttoward(float, double)` now.
Perhaps first one was supposed to remain `nexttoward(float, float)` ?




Comment at: clang/lib/Headers/__clang_cuda_cmath.h:184-197
@@ +183,16 @@
+
+// Defines an overload of __fn that accepts one two arithmetic arguments, calls
+// __fn((double)x, (double)y), and returns a double.
+//
+// Note this is different from OVERLOAD_1, which generates an overload that
+// accepts only *integral* arguments.
+#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn)  
\
+  template   
\
+  __DEVICE__ typename __clang_cuda_enable_if<  
\
+  std::numeric_limits<__T1>::is_specialized && 
\
+  std::numeric_limits<__T2>::is_specialized,   
\
+  __retty>::type   
\
+  __fn(__T1 __x, __T2 __y) {   
\
+return __fn((double)__x, (double)__y); 
\
+  }
+

`is_specialized` will be true for `long double` args and we'll instantiate the 
function. Can we/should we produce an error instead?


https://reviews.llvm.org/D23627



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits