[libclc] r324376 - Add vstore_half_rte implementation

Jan Vesely via cfe-commits Tue, 06 Feb 2018 10:47:11 -0800

Author: jvesely
Date: Tue Feb  6 10:44:50 2018
New Revision: 324376

URL: http://llvm.org/viewvc/llvm-project?rev=324376&view=rev
Log:
Add vstore_half_rte implementation


Passes CTS on carrizo

Reviewer: Jeroen Ketema <j.ket...@xs4all.nl>
Signed-off-by: Jan Vesely <jan.ves...@rutgers.edu>

Modified:
    libclc/trunk/generic/include/clc/shared/vstore.h
    libclc/trunk/generic/lib/shared/vstore.cl

Modified: libclc/trunk/generic/include/clc/shared/vstore.h
URL: 
http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/include/clc/shared/vstore.h?rev=324376&r1=324375&r2=324376&view=diff
==============================================================================
--- libclc/trunk/generic/include/clc/shared/vstore.h (original)
+++ libclc/trunk/generic/include/clc/shared/vstore.h Tue Feb  6 10:44:50 2018
@@ -40,6 +40,7 @@ _CLC_VECTOR_VSTORE_HALF_PRIM1(float,)
 _CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtz)
 _CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtn)
 _CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtp)
+_CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rte)
 
 #ifdef cl_khr_fp64
   _CLC_VECTOR_VSTORE_PRIM1(double)
@@ -47,6 +48,7 @@ _CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rt
   _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtz)
   _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtn)
   _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtp)
+  _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rte)
 #endif
 
 #ifdef cl_khr_fp16

Modified: libclc/trunk/generic/lib/shared/vstore.cl
URL: 
http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/shared/vstore.cl?rev=324376&r1=324375&r2=324376&view=diff
==============================================================================
--- libclc/trunk/generic/lib/shared/vstore.cl (original)
+++ libclc/trunk/generic/lib/shared/vstore.cl Tue Feb  6 10:44:50 2018
@@ -147,6 +147,27 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rtp(f
 {
        return ((as_uint(x) & 0x80000000) == 0) ? __clc_rti(x) : __clc_rtz(x);
 }
+_CLC_DEF _CLC_OVERLOAD float __clc_rte(float x)
+{
+       /* Mantisa + implicit bit */
+       const uint mantissa = (as_uint(x) & 0x7fffff) | (1u << 23);
+       const int exp = (as_uint(x) >> 23 & 0xff) - 127;
+       int shift = 13;
+       if (exp < -14) {
+               /* The default assumes lower 13 bits are rounded,
+                * but it might be more for denormals.
+                * Shifting beyond last == 0b, and qr == 00b is not necessary */
+               shift += min(-(exp + 14), 15);
+       }
+       int mask = (1 << shift) - 1;
+       const uint grs = mantissa & mask;
+       const uint last = mantissa & (1 << shift);
+       /* IEEE round up rule is: grs > 101b or grs == 100b and last == 1.
+        * exp > 15 should round to inf. */
+       bool roundup = (grs > (1 << (shift - 1))) ||
+               (grs == (1 << (shift - 1)) && last != 0) || (exp > 15);
+       return roundup ? __clc_rti(x) : __clc_rtz(x);
+}
 
 #ifdef cl_khr_fp64
 _CLC_DEF _CLC_OVERLOAD double __clc_noop(double x)
@@ -192,13 +213,35 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rtp(
 {
        return ((as_ulong(x) & 0x8000000000000000UL) == 0) ? __clc_rti(x) : 
__clc_rtz(x);
 }
+_CLC_DEF _CLC_OVERLOAD double __clc_rte(double x)
+{
+       /* Mantisa + implicit bit */
+       const ulong mantissa = (as_ulong(x) & 0xfffffffffffff) | (1UL << 52);
+       const int exp = (as_ulong(x) >> 52 & 0x7ff) - 1023;
+       int shift = 42;
+       if (exp < -14) {
+               /* The default assumes lower 13 bits are rounded,
+                * but it might be more for denormals.
+                * Shifting beyond last == 0b, and qr == 00b is not necessary */
+               shift += min(-(exp + 14), 15);
+       }
+       ulong mask = (1UL << shift) - 1UL;
+       const ulong grs = mantissa & mask;
+       const ulong last = mantissa & (1UL << shift);
+       /* IEEE round up rule is: grs > 101b or grs == 100b and last == 1.
+        * exp > 15 should round to inf. */
+       bool roundup = (grs > (1UL << (shift - 1UL))) ||
+               (grs == (1UL << (shift - 1UL)) && last != 0) || (exp > 15);
+       return roundup ? __clc_rti(x) : __clc_rtz(x);
+}
 #endif
 
 #define __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \
        __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_noop) \
        __FUNC(SUFFIX ## _rtz, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtz) \
        __FUNC(SUFFIX ## _rtn, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtn) \
-       __FUNC(SUFFIX ## _rtp, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtp)
+       __FUNC(SUFFIX ## _rtp, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtp) \
+       __FUNC(SUFFIX ## _rte, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rte)
 
 #define FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \
        __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS)


_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[libclc] r324376 - Add vstore_half_rte implementation

Reply via email to