https://github.com/rjodinchr updated 
https://github.com/llvm/llvm-project/pull/98149

>From c03c6e3a746fe6bfaf164151278db3cd32111f19 Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjo...@chromium.org>
Date: Tue, 9 Jul 2024 14:22:26 +0200
Subject: [PATCH] libclc: increase fp16 support

Increase fp16 support to allow clspv to continue to be OpenCL
compliant following the update of the OpenCL-CTS adding more testing
on math functions and conversions with half.

Math functions are implemented by upscaling to fp32 and using the fp32
implementation. It garantees the accuracy required for half-precision
float-point by the CTS.
---
 libclc/clspv/lib/math/fma.cl              |  11 +++
 libclc/generic/include/clc/convert.h      |  23 ++++-
 libclc/generic/include/math/clc_ldexp.h   |   2 +-
 libclc/generic/lib/clcmacro.h             | 110 ++++++++++++++--------
 libclc/generic/lib/gen_convert.py         |  64 +++++++++++--
 libclc/generic/lib/math/acos.cl           |   2 +
 libclc/generic/lib/math/acosh.cl          |   2 +
 libclc/generic/lib/math/acospi.cl         |   2 +
 libclc/generic/lib/math/asinh.cl          |   2 +
 libclc/generic/lib/math/atan.cl           |  12 +++
 libclc/generic/lib/math/atan2.cl          |   2 +
 libclc/generic/lib/math/atan2pi.cl        |   2 +
 libclc/generic/lib/math/atanh.cl          |   2 +
 libclc/generic/lib/math/atanpi.cl         |   2 +
 libclc/generic/lib/math/cbrt.cl           |   2 +
 libclc/generic/lib/math/clc_ldexp.cl      |  12 +++
 libclc/generic/lib/math/clc_pown.cl       |  12 +++
 libclc/generic/lib/math/clc_remquo.cl     |  15 +++
 libclc/generic/lib/math/clc_rootn.cl      |  12 +++
 libclc/generic/lib/math/clc_sw_binary.inc |  16 +++-
 libclc/generic/lib/math/clc_sw_unary.inc  |  12 ++-
 libclc/generic/lib/math/cos.cl            |   2 +
 libclc/generic/lib/math/cosh.cl           |   2 +
 libclc/generic/lib/math/cospi.cl          |   2 +
 libclc/generic/lib/math/exp.cl            |   2 +
 libclc/generic/lib/math/expm1.cl          |   2 +
 libclc/generic/lib/math/fdim.inc          |  25 +++++
 libclc/generic/lib/math/frexp.inc         |  13 +++
 libclc/generic/lib/math/ilogb.cl          |  12 +++
 libclc/generic/lib/math/lgamma.cl         |   4 +-
 libclc/generic/lib/math/lgamma_r.cl       |  11 +++
 libclc/generic/lib/math/lgamma_r.inc      |   3 -
 libclc/generic/lib/math/log10.cl          |   8 ++
 libclc/generic/lib/math/log1p.cl          |   2 +
 libclc/generic/lib/math/log2.cl           |   8 ++
 libclc/generic/lib/math/log_base.h        |  19 ++++
 libclc/generic/lib/math/logb.cl           |   2 +
 libclc/generic/lib/math/pown.inc          |   3 -
 libclc/generic/lib/math/remquo.inc        |   3 -
 libclc/generic/lib/math/rootn.inc         |   3 -
 libclc/generic/lib/math/sin.cl            |   2 +
 libclc/generic/lib/math/sincos.inc        |   3 -
 libclc/generic/lib/math/sinh.cl           |   2 +
 libclc/generic/lib/math/sinpi.cl          |   2 +
 libclc/generic/lib/math/tanh.cl           |   2 +
 45 files changed, 389 insertions(+), 67 deletions(-)

diff --git a/libclc/clspv/lib/math/fma.cl b/libclc/clspv/lib/math/fma.cl
index 4f2806933eda9..3ffca28bd3bef 100644
--- a/libclc/clspv/lib/math/fma.cl
+++ b/libclc/clspv/lib/math/fma.cl
@@ -269,3 +269,14 @@ _CLC_DEF _CLC_OVERLOAD float fma(float a, float b, float 
c) {
                   ((uint)st_fma.mantissa.lo & 0x7fffff));
 }
 _CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, fma, float, float, float)
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEF _CLC_OVERLOAD half fma(half a, half b, half c) {
+  return (half)mad((float)a, (float)b, (float)c);
+}
+_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, half, fma, half, half, half)
+
+#endif
diff --git a/libclc/generic/include/clc/convert.h 
b/libclc/generic/include/clc/convert.h
index f0ba796864d4d..db7bb0402491e 100644
--- a/libclc/generic/include/clc/convert.h
+++ b/libclc/generic/include/clc/convert.h
@@ -20,10 +20,19 @@
   _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ulong, SUFFIX) \
   _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, float, SUFFIX)
 
-#ifdef cl_khr_fp64
+#if defined(cl_khr_fp64) && defined(cl_khr_fp16)
+#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX)                            
\
+  _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX)                                 
\
+  _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX)                          
\
+  _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX)
+#elif defined(cl_khr_fp64)
 #define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
   _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
   _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX)
+#elif defined(cl_khr_fp16)
+#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX)                            
\
+  _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX)                                 
\
+  _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, half, SUFFIX)
 #else
 #define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
   _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX)
@@ -40,11 +49,19 @@
   _CLC_VECTOR_CONVERT_FROM(ulong, SUFFIX) \
   _CLC_VECTOR_CONVERT_FROM(float, SUFFIX)
 
-#ifdef cl_khr_fp64
+#if defined(cl_khr_fp64) && defined(cl_khr_fp16)
+#define _CLC_VECTOR_CONVERT_TO(SUFFIX)                                         
\
+  _CLC_VECTOR_CONVERT_TO1(SUFFIX)                                              
\
+  _CLC_VECTOR_CONVERT_FROM(double, SUFFIX)                                     
\
+  _CLC_VECTOR_CONVERT_FROM(half, SUFFIX)
+#elif defined(cl_khr_fp64)
 #define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
   _CLC_VECTOR_CONVERT_TO1(SUFFIX) \
   _CLC_VECTOR_CONVERT_FROM(double, SUFFIX)
-#else
+#elif defined(cl_khr_fp16)
+#define _CLC_VECTOR_CONVERT_TO(SUFFIX)                                         
\
+  _CLC_VECTOR_CONVERT_TO1(SUFFIX)                                              
\
+  _CLC_VECTOR_CONVERT_FROM(half, SUFFIX)
 #define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
   _CLC_VECTOR_CONVERT_TO1(SUFFIX)
 #endif
diff --git a/libclc/generic/include/math/clc_ldexp.h 
b/libclc/generic/include/math/clc_ldexp.h
index dbfc0447446fe..454b7ed3dcee5 100644
--- a/libclc/generic/include/math/clc_ldexp.h
+++ b/libclc/generic/include/math/clc_ldexp.h
@@ -7,5 +7,5 @@ _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double, int);
 
 #ifdef cl_khr_fp16
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
-_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(half, int);
+_CLC_DEF _CLC_OVERLOAD half __clc_ldexp(half, int);
 #endif
diff --git a/libclc/generic/lib/clcmacro.h b/libclc/generic/lib/clcmacro.h
index f148dc37bae5c..9edef7dda944e 100644
--- a/libclc/generic/lib/clcmacro.h
+++ b/libclc/generic/lib/clcmacro.h
@@ -1,3 +1,5 @@
+#include <utils.h>
+
 #define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
   DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
     return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
@@ -106,44 +108,55 @@
 \
   DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##16 z) { \
     return (RET_TYPE##16)(FUNCTION(x, y, z.lo), FUNCTION(x, y, z.hi)); \
-  } \
-\
+  }
 
-#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, 
ADDR_SPACE, ARG2_TYPE) \
-  DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ADDR_SPACE ARG2_TYPE##2 *y) { \
-    return (RET_TYPE##2)( \
-        FUNCTION(x.x, (ARG2_TYPE*)y), \
-        FUNCTION(x.y, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+1)) \
-    ); \
-  } \
-\
-  DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ADDR_SPACE ARG2_TYPE##3 *y) { \
-    return (RET_TYPE##3)( \
-        FUNCTION(x.x, (ARG2_TYPE*)y), \
-        FUNCTION(x.y, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+1)), \
-        FUNCTION(x.z, (ADDR_SPACE ARG2_TYPE*)((ADDR_SPACE ARG2_TYPE*)y+2)) \
-    ); \
-  } \
-\
-  DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ADDR_SPACE ARG2_TYPE##4 *y) { \
-    return (RET_TYPE##4)( \
-        FUNCTION(x.lo, (ARG2_TYPE##2*)y), \
-        FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##2*)((ADDR_SPACE ARG2_TYPE*)y+2)) 
\
-    ); \
-  } \
-\
-  DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ADDR_SPACE ARG2_TYPE##8 *y) { \
-    return (RET_TYPE##8)( \
-        FUNCTION(x.lo, (ARG2_TYPE##4*)y), \
-        FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##4*)((ADDR_SPACE ARG2_TYPE*)y+4)) 
\
-    ); \
-  } \
-\
-  DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ADDR_SPACE ARG2_TYPE##16 *y) 
{ \
-    return (RET_TYPE##16)( \
-        FUNCTION(x.lo, (ARG2_TYPE##8*)y), \
-        FUNCTION(x.hi, (ADDR_SPACE ARG2_TYPE##8*)((ADDR_SPACE ARG2_TYPE*)y+8)) 
\
-    ); \
+#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,         
\
+                              ADDR_SPACE, ARG2_TYPE)                           
\
+  DECLSPEC __CLC_XCONCAT(RET_TYPE, 2)                                          
\
+      FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x,                                  
\
+               ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) {                   
\
+    return (__CLC_XCONCAT(RET_TYPE, 2))(                                       
\
+        FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y),                              
\
+        FUNCTION(x.y,                                                          
\
+                 (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1)));    
\
+  }                                                                            
\
+                                                                               
\
+  DECLSPEC __CLC_XCONCAT(RET_TYPE, 3)                                          
\
+      FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x,                                  
\
+               ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) {                   
\
+    return (__CLC_XCONCAT(RET_TYPE, 3))(                                       
\
+        FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y),                              
\
+        FUNCTION(x.y,                                                          
\
+                 (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1)),     
\
+        FUNCTION(x.z,                                                          
\
+                 (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 2)));    
\
+  }                                                                            
\
+                                                                               
\
+  DECLSPEC __CLC_XCONCAT(RET_TYPE, 4)                                          
\
+      FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x,                                  
\
+               ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) {                   
\
+    return (__CLC_XCONCAT(RET_TYPE, 4))(                                       
\
+        FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) *)y),           
\
+        FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT(                              
\
+                           ARG2_TYPE, 2) *)((ADDR_SPACE ARG2_TYPE *)y + 2)));  
\
+  }                                                                            
\
+                                                                               
\
+  DECLSPEC __CLC_XCONCAT(RET_TYPE, 8)                                          
\
+      FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x,                                  
\
+               ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) {                   
\
+    return (__CLC_XCONCAT(RET_TYPE, 8))(                                       
\
+        FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) *)y),           
\
+        FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT(                              
\
+                           ARG2_TYPE, 4) *)((ADDR_SPACE ARG2_TYPE *)y + 4)));  
\
+  }                                                                            
\
+                                                                               
\
+  DECLSPEC __CLC_XCONCAT(RET_TYPE, 16)                                         
\
+      FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x,                                 
\
+               ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) {                  
\
+    return (__CLC_XCONCAT(RET_TYPE, 16))(                                      
\
+        FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) *)y),           
\
+        FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT(                              
\
+                           ARG2_TYPE, 8) *)((ADDR_SPACE ARG2_TYPE *)y + 8)));  
\
   }
 
 #define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, 
ARG2_TYPE) \
@@ -161,3 +174,26 @@ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { \
   return BUILTIN(x); \
 } \
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE)
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+#define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION)                               
\
+  _CLC_DEF _CLC_OVERLOAD half FUNCTION(half x) {                               
\
+    return (half)FUNCTION((float)x);                                           
\
+  }                                                                            
\
+  _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half)
+
+#define _CLC_DEFINE_BINARY_BUILTIN_FP16(FUNCTION)                              
\
+  _CLC_DEF _CLC_OVERLOAD half FUNCTION(half x, half y) {                       
\
+    return (half)FUNCTION((float)x, (float)y);                                 
\
+  }                                                                            
\
+  _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, FUNCTION, half, half)
+
+#else
+
+#define _CLC_DEFINE_UNARY_BUILTIN_FP16(FUNCTION)
+#define _CLC_DEFINE_BINARY_BUILTIN_FP16(FUNCTION)
+
+#endif
diff --git a/libclc/generic/lib/gen_convert.py 
b/libclc/generic/lib/gen_convert.py
index 21fc8ebc80d15..bd36faa4e9197 100644
--- a/libclc/generic/lib/gen_convert.py
+++ b/libclc/generic/lib/gen_convert.py
@@ -46,21 +46,21 @@
     "uint",
     "long",
     "ulong",
+    "half",
     "float",
     "double",
 ]
 int_types = ["char", "uchar", "short", "ushort", "int", "uint", "long", 
"ulong"]
 unsigned_types = ["uchar", "ushort", "uint", "ulong"]
-float_types = ["float", "double"]
+float_types = ["half", "float", "double"]
 int64_types = ["long", "ulong"]
 float64_types = ["double"]
+float16_types = ["half"]
 vector_sizes = ["", "2", "3", "4", "8", "16"]
 half_sizes = [("2", ""), ("4", "2"), ("8", "4"), ("16", "8")]
 
 saturation = ["", "_sat"]
 rounding_modes = ["_rtz", "_rte", "_rtp", "_rtn"]
-float_prefix = {"float": "FLT_", "double": "DBL_"}
-float_suffix = {"float": "f", "double": ""}
 
 bool_type = {
     "char": "char",
@@ -71,6 +71,7 @@
     "uint": "int",
     "long": "long",
     "ulong": "long",
+    "half": "short",
     "float": "int",
     "double": "long",
 }
@@ -95,6 +96,7 @@
     "uint": 4,
     "long": 8,
     "ulong": 8,
+    "half": 2,
     "float": 4,
     "double": 8,
 }
@@ -108,6 +110,7 @@
     "uint": "UINT_MAX",
     "long": "LONG_MAX",
     "ulong": "ULONG_MAX",
+    "half": "0x1.ffcp+15",
 }
 
 limit_min = {
@@ -119,24 +122,33 @@
     "uint": "0",
     "long": "LONG_MIN",
     "ulong": "0",
+    "half": "-0x1.ffcp+15",
 }
 
 
 def conditional_guard(src, dst):
     int64_count = 0
     float64_count = 0
+    float16_count = 0
     if src in int64_types:
         int64_count = int64_count + 1
     elif src in float64_types:
         float64_count = float64_count + 1
+    elif src in float16_types:
+        float16_count = float16_count + 1
     if dst in int64_types:
         int64_count = int64_count + 1
     elif dst in float64_types:
         float64_count = float64_count + 1
+    elif dst in float16_types:
+        float16_count = float16_count + 1
     if float64_count > 0:
         # In embedded profile, if cl_khr_fp64 is supported cles_khr_int64 has 
to be
         print("#ifdef cl_khr_fp64")
         return True
+    elif float16_count > 0:
+        print("#if defined cl_khr_fp16")
+        return True
     elif int64_count > 0:
         print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)")
         return True
@@ -175,6 +187,10 @@ def conditional_guard(src, dst):
 
 #include <clc/clc.h>
 
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#endif
+
 #ifdef cl_khr_fp64
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
@@ -498,22 +514,42 @@ def generate_float_conversion(src, dst, size, mode, sat):
                         )
                     )
                 print(
-                    "  return select(r, nextafter(r, sign(r) * 
({DST}{N})-INFINITY), c);".format(
+                    "  {DST}{N} sel = select(r, nextafter(r, sign(r) * 
({DST}{N})-INFINITY), c);".format(
                         DST=dst, N=size, BOOL=bool_type[dst], SRC=src
                     )
                 )
             else:
                 print(
-                    "  return select(r, nextafter(r, sign(r) * 
({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format(
+                    "  {DST}{N} sel = select(r, nextafter(r, sign(r) * 
({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format(
                         DST=dst, N=size, BOOL=bool_type[dst]
                     )
                 )
+            if dst == "half" and src in int_types and sizeof_type[src] >= 2:
+                dst_max = limit_max[dst]
+                # short is 16 bits signed, so the maximum value rounded to 
zero is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767)
+                if src == "short":
+                    dst_max = "0x1.ffcp+14"
+                print(
+                    "  return clamp(sel, ({DST}{N}){DST_MIN}, 
({DST}{N}){DST_MAX});".format(
+                        DST=dst, N=size, DST_MIN=limit_min[dst], 
DST_MAX=dst_max
+                    )
+                )
+            else:
+                print("  return sel;")
         if mode == "_rtp":
             print(
-                "  return select(r, nextafter(r, ({DST}{N})INFINITY), 
convert_{BOOL}{N}(y < x));".format(
+                "  {DST}{N} sel = select(r, nextafter(r, ({DST}{N})INFINITY), 
convert_{BOOL}{N}(y < x));".format(
                     DST=dst, N=size, BOOL=bool_type[dst]
                 )
             )
+            if dst == "half" and src in int_types and sizeof_type[src] >= 2:
+                print(
+                    "  return max(sel, ({DST}{N}){DST_MIN});".format(
+                        DST=dst, N=size, DST_MIN=limit_min[dst]
+                    )
+                )
+            else:
+                print("  return sel;")
         if mode == "_rtn":
             if clspv:
                 print(
@@ -528,16 +564,28 @@ def generate_float_conversion(src, dst, size, mode, sat):
                         )
                     )
                 print(
-                    "  return select(r, nextafter(r, ({DST}{N})-INFINITY), 
c);".format(
+                    "  {DST}{N} sel = select(r, nextafter(r, 
({DST}{N})-INFINITY), c);".format(
                         DST=dst, N=size, BOOL=bool_type[dst], SRC=src
                     )
                 )
             else:
                 print(
-                    "  return select(r, nextafter(r, ({DST}{N})-INFINITY), 
convert_{BOOL}{N}(y > x));".format(
+                    "  {DST}{N} sel = select(r, nextafter(r, 
({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format(
                         DST=dst, N=size, BOOL=bool_type[dst]
                     )
                 )
+            if dst == "half" and src in int_types and sizeof_type[src] >= 2:
+                dst_max = limit_max[dst]
+                # short is 16 bits signed, so the maximum value rounded to 
negative infinity is 0x1.ffcp+14 (0x1p+15 == 32768 > 0x7fff == 32767)
+                if src == "short":
+                    dst_max = "0x1.ffcp+14"
+                print(
+                    "  return min(sel, ({DST}{N}){DST_MAX});".format(
+                        DST=dst, N=size, DST_MAX=dst_max
+                    )
+                )
+            else:
+                print("  return sel;")
 
     # Footer
     print("}")
diff --git a/libclc/generic/lib/math/acos.cl b/libclc/generic/lib/math/acos.cl
index 87db01416c86e..af59f443e8717 100644
--- a/libclc/generic/lib/math/acos.cl
+++ b/libclc/generic/lib/math/acos.cl
@@ -171,3 +171,5 @@ _CLC_OVERLOAD _CLC_DEF double acos(double x) {
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acos, double);
 
 #endif // cl_khr_fp64
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(acos)
diff --git a/libclc/generic/lib/math/acosh.cl b/libclc/generic/lib/math/acosh.cl
index 59da511627744..6e8dd78c3c00c 100644
--- a/libclc/generic/lib/math/acosh.cl
+++ b/libclc/generic/lib/math/acosh.cl
@@ -125,3 +125,5 @@ _CLC_OVERLOAD _CLC_DEF double acosh(double x) {
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acosh, double)
 
 #endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(acosh)
diff --git a/libclc/generic/lib/math/acospi.cl 
b/libclc/generic/lib/math/acospi.cl
index c91fc41789647..7ebf802b5fbc5 100644
--- a/libclc/generic/lib/math/acospi.cl
+++ b/libclc/generic/lib/math/acospi.cl
@@ -170,3 +170,5 @@ _CLC_OVERLOAD _CLC_DEF double acospi(double x) {
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, acospi, double)
 
 #endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(acospi)
diff --git a/libclc/generic/lib/math/asinh.cl b/libclc/generic/lib/math/asinh.cl
index cfddb31c68c35..9f8ddad873af0 100644
--- a/libclc/generic/lib/math/asinh.cl
+++ b/libclc/generic/lib/math/asinh.cl
@@ -291,3 +291,5 @@ _CLC_OVERLOAD _CLC_DEF double asinh(double x) {
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, asinh, double)
 
 #endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(asinh)
diff --git a/libclc/generic/lib/math/atan.cl b/libclc/generic/lib/math/atan.cl
index fa3633cef7480..adc26a8ae6f29 100644
--- a/libclc/generic/lib/math/atan.cl
+++ b/libclc/generic/lib/math/atan.cl
@@ -181,3 +181,15 @@ _CLC_OVERLOAD _CLC_DEF double atan(double x)
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan, double);
 
 #endif // cl_khr_fp64
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half atan(half x) {
+    return (half)atan((float)x);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, atan, half);
+
+#endif
diff --git a/libclc/generic/lib/math/atan2.cl b/libclc/generic/lib/math/atan2.cl
index a2f104fa185b6..d260338b83ec7 100644
--- a/libclc/generic/lib/math/atan2.cl
+++ b/libclc/generic/lib/math/atan2.cl
@@ -235,3 +235,5 @@ _CLC_OVERLOAD _CLC_DEF double atan2(double y, double x)
 _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2, double, double);
 
 #endif
+
+_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2)
diff --git a/libclc/generic/lib/math/atan2pi.cl 
b/libclc/generic/lib/math/atan2pi.cl
index a15b14fd319d8..ad2eda3dec87c 100644
--- a/libclc/generic/lib/math/atan2pi.cl
+++ b/libclc/generic/lib/math/atan2pi.cl
@@ -219,3 +219,5 @@ _CLC_OVERLOAD _CLC_DEF double atan2pi(double y, double x) {
 _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2pi, double, double)
 
 #endif
+
+_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2pi)
diff --git a/libclc/generic/lib/math/atanh.cl b/libclc/generic/lib/math/atanh.cl
index 4af2f458536a5..524af025b0b58 100644
--- a/libclc/generic/lib/math/atanh.cl
+++ b/libclc/generic/lib/math/atanh.cl
@@ -111,3 +111,5 @@ _CLC_OVERLOAD _CLC_DEF double atanh(double x) {
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanh, double)
 
 #endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(atanh)
diff --git a/libclc/generic/lib/math/atanpi.cl 
b/libclc/generic/lib/math/atanpi.cl
index 2e2f032d8e6c2..625af12ba8518 100644
--- a/libclc/generic/lib/math/atanpi.cl
+++ b/libclc/generic/lib/math/atanpi.cl
@@ -180,3 +180,5 @@ _CLC_OVERLOAD _CLC_DEF double atanpi(double x) {
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atanpi, double)
 
 #endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(atanpi)
diff --git a/libclc/generic/lib/math/cbrt.cl b/libclc/generic/lib/math/cbrt.cl
index 5ff9367c89891..2f5ff9516ed76 100644
--- a/libclc/generic/lib/math/cbrt.cl
+++ b/libclc/generic/lib/math/cbrt.cl
@@ -149,3 +149,5 @@ _CLC_OVERLOAD _CLC_DEF double cbrt(double x) {
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cbrt, double)
 
 #endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(cbrt)
diff --git a/libclc/generic/lib/math/clc_ldexp.cl 
b/libclc/generic/lib/math/clc_ldexp.cl
index 61e34a521609c..ae6117b7b2922 100644
--- a/libclc/generic/lib/math/clc_ldexp.cl
+++ b/libclc/generic/lib/math/clc_ldexp.cl
@@ -126,3 +126,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double x, int n) 
{
 }
 
 #endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half __clc_ldexp(half x, int n) {
+    return (half)__clc_ldexp((float)x, n);
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_ldexp, half, int);
+
+#endif
diff --git a/libclc/generic/lib/math/clc_pown.cl 
b/libclc/generic/lib/math/clc_pown.cl
index 0b7ac327512db..1a1e2acec6eda 100644
--- a/libclc/generic/lib/math/clc_pown.cl
+++ b/libclc/generic/lib/math/clc_pown.cl
@@ -368,3 +368,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny)
 }
 _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_pown, double, int)
 #endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half __clc_pown(half x, int y) {
+    return (half)__clc_pown((float)x, y);
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_pown, half, int);
+
+#endif
diff --git a/libclc/generic/lib/math/clc_remquo.cl 
b/libclc/generic/lib/math/clc_remquo.cl
index 3b9159ac967ef..edf4422610f4f 100644
--- a/libclc/generic/lib/math/clc_remquo.cl
+++ b/libclc/generic/lib/math/clc_remquo.cl
@@ -254,3 +254,18 @@ __VEC_REMQUO(double, 4, 2)
 __VEC_REMQUO(double, 8, 4)
 __VEC_REMQUO(double, 16, 8)
 #endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half __clc_remquo(half x, half y, __private int *pquo) {
+    return (half)__clc_remquo((float)x, (float)y, pquo);
+}
+__VEC_REMQUO(half, 2,)
+__VEC_REMQUO(half, 3, 2)
+__VEC_REMQUO(half, 4, 2)
+__VEC_REMQUO(half, 8, 4)
+__VEC_REMQUO(half, 16, 8)
+
+#endif
diff --git a/libclc/generic/lib/math/clc_rootn.cl 
b/libclc/generic/lib/math/clc_rootn.cl
index 0a2c98d3787cf..040b614f5feb4 100644
--- a/libclc/generic/lib/math/clc_rootn.cl
+++ b/libclc/generic/lib/math/clc_rootn.cl
@@ -368,3 +368,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny)
 }
 _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_rootn, double, int)
 #endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half __clc_rootn(half x, int y) {
+    return (half)__clc_rootn((float)x, y);
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_rootn, half, int);
+
+#endif
diff --git a/libclc/generic/lib/math/clc_sw_binary.inc 
b/libclc/generic/lib/math/clc_sw_binary.inc
index 7741475c23717..5cf15a21f7805 100644
--- a/libclc/generic/lib/math/clc_sw_binary.inc
+++ b/libclc/generic/lib/math/clc_sw_binary.inc
@@ -2,11 +2,25 @@
 
 #define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x)
 
-// TODO: Enable half precision when the sw routine is implemented
 #if __CLC_FPSIZE > 16
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, __CLC_GENTYPE 
y) {
   return __CLC_SW_FUNC(__CLC_FUNC)(x, y);
 }
+#elif __CLC_FPSIZE == 16
+#ifdef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x,
+                                                __CLC_GENTYPE y) {
+  return convert_half(
+      __CLC_SW_FUNC(__CLC_FUNC)(convert_float(x), convert_float(y)));
+}
+#else
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x,
+                                                __CLC_GENTYPE y) {
+  return __CLC_XCONCAT(convert_half, __CLC_VECSIZE)(__CLC_SW_FUNC(__CLC_FUNC)(
+      __CLC_XCONCAT(convert_float, __CLC_VECSIZE)(x),
+      __CLC_XCONCAT(convert_float, __CLC_VECSIZE)(y)));
+}
+#endif
 #endif
 
 #undef __CLC_SW_FUNC
diff --git a/libclc/generic/lib/math/clc_sw_unary.inc 
b/libclc/generic/lib/math/clc_sw_unary.inc
index cd148b07a02c3..9b908aee87a18 100644
--- a/libclc/generic/lib/math/clc_sw_unary.inc
+++ b/libclc/generic/lib/math/clc_sw_unary.inc
@@ -2,11 +2,21 @@
 
 #define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x)
 
-// TODO: Enable half precision when the sw routine is implemented
 #if __CLC_FPSIZE > 16
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) {
   return __CLC_SW_FUNC(__CLC_FUNC)(x);
 }
+#elif __CLC_FPSIZE == 16
+#ifdef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) {
+  return convert_half(__CLC_SW_FUNC(__CLC_FUNC)(convert_float(x)));
+}
+#else
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) {
+  return __CLC_XCONCAT(convert_half, __CLC_VECSIZE)(__CLC_SW_FUNC(__CLC_FUNC)(
+      __CLC_XCONCAT(convert_float, __CLC_VECSIZE)(x)));
+}
+#endif
 #endif
 
 #undef __CLC_SW_FUNC
diff --git a/libclc/generic/lib/math/cos.cl b/libclc/generic/lib/math/cos.cl
index 157447f9cd7ce..0af7aa67ba009 100644
--- a/libclc/generic/lib/math/cos.cl
+++ b/libclc/generic/lib/math/cos.cl
@@ -75,3 +75,5 @@ _CLC_OVERLOAD _CLC_DEF double cos(double x) {
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cos, double);
 
 #endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(cos)
diff --git a/libclc/generic/lib/math/cosh.cl b/libclc/generic/lib/math/cosh.cl
index 1a672755d1f7c..0987d1f921692 100644
--- a/libclc/generic/lib/math/cosh.cl
+++ b/libclc/generic/lib/math/cosh.cl
@@ -190,3 +190,5 @@ _CLC_OVERLOAD _CLC_DEF double cosh(double x) {
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cosh, double)
 
 #endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(cosh)
diff --git a/libclc/generic/lib/math/cospi.cl b/libclc/generic/lib/math/cospi.cl
index 108b637c9abb6..d6ab27ba021e2 100644
--- a/libclc/generic/lib/math/cospi.cl
+++ b/libclc/generic/lib/math/cospi.cl
@@ -134,3 +134,5 @@ _CLC_OVERLOAD _CLC_DEF double cospi(double x) {
 }
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cospi, double);
 #endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(cospi)
diff --git a/libclc/generic/lib/math/exp.cl b/libclc/generic/lib/math/exp.cl
index 37f693c39be2b..cc7b7f39bf1db 100644
--- a/libclc/generic/lib/math/exp.cl
+++ b/libclc/generic/lib/math/exp.cl
@@ -88,3 +88,5 @@ _CLC_OVERLOAD _CLC_DEF double exp(double x) {
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp, double)
 
 #endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(exp)
diff --git a/libclc/generic/lib/math/expm1.cl b/libclc/generic/lib/math/expm1.cl
index 9a3a90718a68d..9a0aa37ac333b 100644
--- a/libclc/generic/lib/math/expm1.cl
+++ b/libclc/generic/lib/math/expm1.cl
@@ -140,3 +140,5 @@ _CLC_OVERLOAD _CLC_DEF double expm1(double x) {
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, expm1, double)
 
 #endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(expm1)
diff --git a/libclc/generic/lib/math/fdim.inc b/libclc/generic/lib/math/fdim.inc
index 9aa3496b18902..98cbef6076667 100644
--- a/libclc/generic/lib/math/fdim.inc
+++ b/libclc/generic/lib/math/fdim.inc
@@ -69,3 +69,28 @@ __CLC_FDIM_VEC(16)
 #undef __CLC_FDIM_VEC
 #endif
 #endif
+
+#if __CLC_FPSIZE == 16
+#ifdef __CLC_SCALAR
+#define QNANBITPATT_FP16 ((short)0x7e00)
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fdim(__CLC_GENTYPE x,
+                                          private __CLC_GENTYPE y) {
+  short n = -(isnan(x) | isnan(y)) & QNANBITPATT_FP16;
+  short r = -(x > y) & as_short(x - y);
+  return as_half((short)(n | r));
+}
+#define __CLC_FDIM_VEC(width)                                                  
\
+  _CLC_OVERLOAD _CLC_DEF half##width fdim(half##width x, half##width y) {      
\
+    /* See comment in float implementation for explanation. */                 
\
+    short##width n = ~((x == x) & (y == y)) & QNANBITPATT_FP16;                
\
+    short##width r = (x > y) & as_short##width(x - y);                         
\
+    return as_half##width(n | r);                                              
\
+  }
+__CLC_FDIM_VEC(2)
+__CLC_FDIM_VEC(3)
+__CLC_FDIM_VEC(4)
+__CLC_FDIM_VEC(8)
+__CLC_FDIM_VEC(16)
+#undef __CLC_FDIM_VEC
+#endif
+#endif
diff --git a/libclc/generic/lib/math/frexp.inc 
b/libclc/generic/lib/math/frexp.inc
index b61cc3592a2a9..e6e2af49235a4 100644
--- a/libclc/generic/lib/math/frexp.inc
+++ b/libclc/generic/lib/math/frexp.inc
@@ -21,6 +21,8 @@
  * THE SOFTWARE.
  */
 
+#include "../clcmacro.h"
+
 #define __CLC_AS_GENTYPE __CLC_XCONCAT(as_, __CLC_GENTYPE)
 #define __CLC_AS_INTN __CLC_XCONCAT(as_, __CLC_INTN)
 
@@ -40,6 +42,17 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, 
__CLC_ADDRESS_SPACE
 }
 #endif
 
+#if __CLC_FPSIZE == 16
+#ifdef __CLC_SCALAR
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x,
+                                           __CLC_ADDRESS_SPACE __CLC_INTN *ep) 
{
+  return (__CLC_GENTYPE)frexp((float)x, ep);
+}
+_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, __CLC_GENTYPE, frexp,
+                      __CLC_GENTYPE, __CLC_ADDRESS_SPACE, __CLC_INTN);
+#endif
+#endif
+
 #if __CLC_FPSIZE == 64
 #ifdef __CLC_SCALAR
 #define __CLC_AS_LONGN as_long
diff --git a/libclc/generic/lib/math/ilogb.cl b/libclc/generic/lib/math/ilogb.cl
index 050239c9c1ffa..d085e8608b01a 100644
--- a/libclc/generic/lib/math/ilogb.cl
+++ b/libclc/generic/lib/math/ilogb.cl
@@ -71,3 +71,15 @@ _CLC_OVERLOAD _CLC_DEF int ilogb(double x) {
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, double);
 
 #endif // cl_khr_fp64
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF int ilogb(half x) {
+    return ilogb((float)x);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, ilogb, half);
+
+#endif
diff --git a/libclc/generic/lib/math/lgamma.cl 
b/libclc/generic/lib/math/lgamma.cl
index 26cd20eb01b86..024894f2f304f 100644
--- a/libclc/generic/lib/math/lgamma.cl
+++ b/libclc/generic/lib/math/lgamma.cl
@@ -41,4 +41,6 @@ _CLC_OVERLOAD _CLC_DEF double lgamma(double x) {
 
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, lgamma, double)
 
-#endif
\ No newline at end of file
+#endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(lgamma)
diff --git a/libclc/generic/lib/math/lgamma_r.cl 
b/libclc/generic/lib/math/lgamma_r.cl
index ff447386ac064..fe28e420d1286 100644
--- a/libclc/generic/lib/math/lgamma_r.cl
+++ b/libclc/generic/lib/math/lgamma_r.cl
@@ -486,6 +486,17 @@ _CLC_OVERLOAD _CLC_DEF double lgamma_r(double x, private 
int *ip) {
 _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, lgamma_r, double, 
private, int)
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half lgamma_r(half x, private int *iptr) {
+    return (half)lgamma_r((float)x, iptr);
+}
+
+_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, lgamma_r, half, private, 
int);
+
+#endif
 
 #define __CLC_ADDRSPACE global
 #define __CLC_BODY <lgamma_r.inc>
diff --git a/libclc/generic/lib/math/lgamma_r.inc 
b/libclc/generic/lib/math/lgamma_r.inc
index 0e19ba8fb2c7c..8aa17fbe79bd8 100644
--- a/libclc/generic/lib/math/lgamma_r.inc
+++ b/libclc/generic/lib/math/lgamma_r.inc
@@ -21,12 +21,9 @@
  * THE SOFTWARE.
  */
 
-// TODO: Enable half precision when the base version is implemented.
-#if __CLC_FPSIZE > 16
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE 
__CLC_INTN *iptr) {
     __CLC_INTN private_iptr;
     __CLC_GENTYPE ret = lgamma_r(x, &private_iptr);
     *iptr = private_iptr;
     return ret;
 }
-#endif
diff --git a/libclc/generic/lib/math/log10.cl b/libclc/generic/lib/math/log10.cl
index 35a53a1eb5f3d..e669f3148f9c0 100644
--- a/libclc/generic/lib/math/log10.cl
+++ b/libclc/generic/lib/math/log10.cl
@@ -28,6 +28,10 @@
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 #endif // cl_khr_fp64
 
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#endif // cl_khr_fp16
+
 #define COMPILING_LOG10
 #include "log_base.h"
 #undef COMPILING_LOG10
@@ -37,3 +41,7 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log10, 
float);
 #ifdef cl_khr_fp64
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log10, double);
 #endif // cl_khr_fp64
+
+#ifdef cl_khr_fp16
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, log10, half);
+#endif // cl_khr_fp16
diff --git a/libclc/generic/lib/math/log1p.cl b/libclc/generic/lib/math/log1p.cl
index be25c64bf6a43..42fd9d3a23f34 100644
--- a/libclc/generic/lib/math/log1p.cl
+++ b/libclc/generic/lib/math/log1p.cl
@@ -175,3 +175,5 @@ _CLC_OVERLOAD _CLC_DEF double log1p(double x)
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log1p, double);
 
 #endif // cl_khr_fp64
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(log1p)
diff --git a/libclc/generic/lib/math/log2.cl b/libclc/generic/lib/math/log2.cl
index 8776a80ec3be4..64463557e3f6d 100644
--- a/libclc/generic/lib/math/log2.cl
+++ b/libclc/generic/lib/math/log2.cl
@@ -28,6 +28,10 @@
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 #endif // cl_khr_fp64
 
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#endif // cl_khr_fp16
+
 #define COMPILING_LOG2
 #include "log_base.h"
 #undef COMPILING_LOG2
@@ -37,3 +41,7 @@ _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log2, 
float);
 #ifdef cl_khr_fp64
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log2, double);
 #endif // cl_khr_fp64
+
+#ifdef cl_khr_fp16
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, log2, half);
+#endif // cl_khr_fp16
diff --git a/libclc/generic/lib/math/log_base.h 
b/libclc/generic/lib/math/log_base.h
index 4e20329f641bb..b8110ca1779a2 100644
--- a/libclc/generic/lib/math/log_base.h
+++ b/libclc/generic/lib/math/log_base.h
@@ -295,3 +295,22 @@ log(double x)
 }
 
 #endif // cl_khr_fp64
+
+#ifdef cl_khr_fp16
+
+_CLC_OVERLOAD _CLC_DEF half
+#if defined(COMPILING_LOG2)
+log2(half x) {
+  return (half)log2((float)x);
+}
+#elif defined(COMPILING_LOG10)
+log10(half x) {
+  return (half)log10((float)x);
+}
+#else
+log(half x) {
+  return (half)log((float)x);
+}
+#endif
+
+#endif // cl_khr_fp16
diff --git a/libclc/generic/lib/math/logb.cl b/libclc/generic/lib/math/logb.cl
index 31e5161653431..c0c2b5de40ebc 100644
--- a/libclc/generic/lib/math/logb.cl
+++ b/libclc/generic/lib/math/logb.cl
@@ -29,3 +29,5 @@ _CLC_OVERLOAD _CLC_DEF double logb(double x) {
 
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, logb, double)
 #endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(logb)
diff --git a/libclc/generic/lib/math/pown.inc b/libclc/generic/lib/math/pown.inc
index 2add2c7459de9..84729d90a796f 100644
--- a/libclc/generic/lib/math/pown.inc
+++ b/libclc/generic/lib/math/pown.inc
@@ -1,6 +1,3 @@
-// TODO: Enable half precision when the sw routine is implemented
-#if __CLC_FPSIZE > 16
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE pown(__CLC_GENTYPE x, __CLC_INTN y) {
   return __clc_pown(x, y);
 }
-#endif
diff --git a/libclc/generic/lib/math/remquo.inc 
b/libclc/generic/lib/math/remquo.inc
index c33b5ddab3112..c1de78a5e7f9c 100644
--- a/libclc/generic/lib/math/remquo.inc
+++ b/libclc/generic/lib/math/remquo.inc
@@ -1,9 +1,6 @@
-// TODO: Enable half precision when the sw routine is implemented
-#if __CLC_FPSIZE > 16
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y, 
__CLC_ADDRESS_SPACE __CLC_INTN *q) {
   __CLC_INTN local_q;
   __CLC_GENTYPE ret = __clc_remquo(x, y, &local_q);
   *q = local_q;
   return ret;
 }
-#endif
diff --git a/libclc/generic/lib/math/rootn.inc 
b/libclc/generic/lib/math/rootn.inc
index f788649685ac9..3f5b00c082cd3 100644
--- a/libclc/generic/lib/math/rootn.inc
+++ b/libclc/generic/lib/math/rootn.inc
@@ -1,6 +1,3 @@
-// TODO: Enable half precision when the sw routine is implemented
-#if __CLC_FPSIZE > 16
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rootn(__CLC_GENTYPE x, __CLC_INTN y) {
   return __clc_rootn(x, y);
 }
-#endif
diff --git a/libclc/generic/lib/math/sin.cl b/libclc/generic/lib/math/sin.cl
index 3a4074925b83e..689c3a1dbd0c8 100644
--- a/libclc/generic/lib/math/sin.cl
+++ b/libclc/generic/lib/math/sin.cl
@@ -77,3 +77,5 @@ _CLC_OVERLOAD _CLC_DEF double sin(double x) {
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sin, double);
 
 #endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(sin)
diff --git a/libclc/generic/lib/math/sincos.inc 
b/libclc/generic/lib/math/sincos.inc
index 2318ffb73f55b..e97f0f9641c1c 100644
--- a/libclc/generic/lib/math/sincos.inc
+++ b/libclc/generic/lib/math/sincos.inc
@@ -1,5 +1,3 @@
-// TODO: Enable half precision when sin/cos is implemented
-#if __CLC_FPSIZE > 16
 #define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \
   _CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \
     *cosval = cos(x); \
@@ -11,4 +9,3 @@ __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE)
 __CLC_DECLARE_SINCOS(private, __CLC_GENTYPE)
 
 #undef __CLC_DECLARE_SINCOS
-#endif
diff --git a/libclc/generic/lib/math/sinh.cl b/libclc/generic/lib/math/sinh.cl
index 9159b89222c28..a7a092f1f547d 100644
--- a/libclc/generic/lib/math/sinh.cl
+++ b/libclc/generic/lib/math/sinh.cl
@@ -189,3 +189,5 @@ _CLC_OVERLOAD _CLC_DEF double sinh(double x)
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinh, double)
 
 #endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(sinh)
diff --git a/libclc/generic/lib/math/sinpi.cl b/libclc/generic/lib/math/sinpi.cl
index dbb995fe0cd9c..c8a1031df0216 100644
--- a/libclc/generic/lib/math/sinpi.cl
+++ b/libclc/generic/lib/math/sinpi.cl
@@ -129,3 +129,5 @@ _CLC_OVERLOAD _CLC_DEF double sinpi(double x)
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinpi, double)
 
 #endif
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(sinpi)
diff --git a/libclc/generic/lib/math/tanh.cl b/libclc/generic/lib/math/tanh.cl
index e9c4079ff311e..6a6810cec1138 100644
--- a/libclc/generic/lib/math/tanh.cl
+++ b/libclc/generic/lib/math/tanh.cl
@@ -144,3 +144,5 @@ _CLC_OVERLOAD _CLC_DEF double tanh(double x)
 _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, tanh, double);
 
 #endif // cl_khr_fp64
+
+_CLC_DEFINE_UNARY_BUILTIN_FP16(tanh)

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to