[libclc] [libclc] Optimize and vectorize signbit (PR #199497)

Wenju He via cfe-commits Mon, 25 May 2026 12:55:33 -0700

https://github.com/wenju-he created 
https://github.com/llvm/llvm-project/pull/199497


Replace element-wise scalarizing implementation with bitwise masking. For 
example,
define hidden range(i32 -1, 1) <2 x i32> @_Z7signbitDv2_f(<2 x float> noundef 
%0) #0 {
  %2 = bitcast <2 x float> %0 to <2 x i32>
  %3 = extractelement <2 x i32> %2, i64 0
  %4 = lshr i32 %3, 31
  %5 = insertelement <2 x i32> poison, i32 %4, i64 0
  %6 = extractelement <2 x i32> %2, i64 1
  %7 = lshr i32 %6, 31
  %8 = insertelement <2 x i32> %5, i32 %7, i64 1
  %9 = icmp ne <2 x i32> %8, zeroinitializer
  %10 = sext <2 x i1> %9 to <2 x i32>
  ret <2 x i32> %10
}
is changed to:
define hidden noundef range(i32 -1, 1) <2 x i32> @_Z7signbitDv2_f(<2 x float> 
noundef %0) #0 {
  %2 = bitcast <2 x float> %0 to <2 x i32>
  %3 = ashr <2 x i32> %2, splat (i32 31)
  ret <2 x i32> %3
}

>From 89c0b81d7f096c36b6b3411610bbf636c677b492 Mon Sep 17 00:00:00 2001
From: Wenju He <[email protected]>
Date: Mon, 25 May 2026 10:00:31 +0200
Subject: [PATCH] [libclc] Optimize and vectorize signbit

Replace element-wise scalarizing implementation with bitwise masking.
For example,
define hidden range(i32 -1, 1) <2 x i32> @_Z7signbitDv2_f(<2 x float> noundef 
%0) #0 {
  %2 = bitcast <2 x float> %0 to <2 x i32>
  %3 = extractelement <2 x i32> %2, i64 0
  %4 = lshr i32 %3, 31
  %5 = insertelement <2 x i32> poison, i32 %4, i64 0
  %6 = extractelement <2 x i32> %2, i64 1
  %7 = lshr i32 %6, 31
  %8 = insertelement <2 x i32> %5, i32 %7, i64 1
  %9 = icmp ne <2 x i32> %8, zeroinitializer
  %10 = sext <2 x i1> %9 to <2 x i32>
  ret <2 x i32> %10
}
is changed to:
define hidden noundef range(i32 -1, 1) <2 x i32> @_Z7signbitDv2_f(<2 x float> 
noundef %0) #0 {
  %2 = bitcast <2 x float> %0 to <2 x i32>
  %3 = ashr <2 x i32> %2, splat (i32 31)
  ret <2 x i32> %3
}
---
 .../clc/include/clc/relational/clc_signbit.h  |  2 +
 .../clc/lib/generic/relational/clc_signbit.cl | 89 +------------------
 .../lib/generic/relational/clc_signbit.inc    | 38 ++++++++
 3 files changed, 44 insertions(+), 85 deletions(-)
 create mode 100644 libclc/clc/lib/generic/relational/clc_signbit.inc

diff --git a/libclc/clc/include/clc/relational/clc_signbit.h 
b/libclc/clc/include/clc/relational/clc_signbit.h
index 45677fba6cb89..1656ba1bcae76 100644
--- a/libclc/clc/include/clc/relational/clc_signbit.h
+++ b/libclc/clc/include/clc/relational/clc_signbit.h
@@ -9,6 +9,8 @@
 #ifndef __CLC_RELATIONAL_CLC_SIGNBIT_H__
 #define __CLC_RELATIONAL_CLC_SIGNBIT_H__
 
+#include "clc/internal/clc.h"
+
 #define __CLC_FUNCTION __clc_signbit
 #define __CLC_BODY "clc/relational/unary_decl.inc"
 
diff --git a/libclc/clc/lib/generic/relational/clc_signbit.cl 
b/libclc/clc/lib/generic/relational/clc_signbit.cl
index 05d2e8a0039ad..20e7a7a14297e 100644
--- a/libclc/clc/lib/generic/relational/clc_signbit.cl
+++ b/libclc/clc/lib/generic/relational/clc_signbit.cl
@@ -6,89 +6,8 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include "clc/internal/clc.h"
-#include "clc/relational/relational.h"
+#include "clc/math/math.h"
+#include "clc/relational/clc_signbit.h"
 
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE, __CLC_FUNCTION, ARG_TYPE)  
\
-  _CLC_DEF _CLC_OVERLOAD RET_TYPE __CLC_FUNCTION(ARG_TYPE x) {                 
\
-    return (RET_TYPE)((RET_TYPE){__CLC_FUNCTION(x.lo),                         
\
-                                 __CLC_FUNCTION(x.hi)} != (RET_TYPE)0);        
\
-  }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE, __CLC_FUNCTION, ARG_TYPE)  
\
-  _CLC_DEF _CLC_OVERLOAD RET_TYPE __CLC_FUNCTION(ARG_TYPE x) {                 
\
-    return (RET_TYPE)((RET_TYPE){__CLC_FUNCTION(x.s0), __CLC_FUNCTION(x.s1),   
\
-                                 __CLC_FUNCTION(x.s2)} != (RET_TYPE)0);        
\
-  }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE, __CLC_FUNCTION, ARG_TYPE)  
\
-  _CLC_DEF _CLC_OVERLOAD RET_TYPE __CLC_FUNCTION(ARG_TYPE x) {                 
\
-    return (RET_TYPE)((RET_TYPE){__CLC_FUNCTION(x.s0), __CLC_FUNCTION(x.s1),   
\
-                                 __CLC_FUNCTION(x.s2),                         
\
-                                 __CLC_FUNCTION(x.s3)} != (RET_TYPE)0);        
\
-  }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE, __CLC_FUNCTION, ARG_TYPE)  
\
-  _CLC_DEF _CLC_OVERLOAD RET_TYPE __CLC_FUNCTION(ARG_TYPE x) {                 
\
-    return (RET_TYPE)((RET_TYPE){__CLC_FUNCTION(x.s0), __CLC_FUNCTION(x.s1),   
\
-                                 __CLC_FUNCTION(x.s2), __CLC_FUNCTION(x.s3),   
\
-                                 __CLC_FUNCTION(x.s4), __CLC_FUNCTION(x.s5),   
\
-                                 __CLC_FUNCTION(x.s6),                         
\
-                                 __CLC_FUNCTION(x.s7)} != (RET_TYPE)0);        
\
-  }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE, __CLC_FUNCTION, ARG_TYPE) 
\
-  _CLC_DEF _CLC_OVERLOAD RET_TYPE __CLC_FUNCTION(ARG_TYPE x) {                 
\
-    return (RET_TYPE)((RET_TYPE){__CLC_FUNCTION(x.s0), __CLC_FUNCTION(x.s1),   
\
-                                 __CLC_FUNCTION(x.s2), __CLC_FUNCTION(x.s3),   
\
-                                 __CLC_FUNCTION(x.s4), __CLC_FUNCTION(x.s5),   
\
-                                 __CLC_FUNCTION(x.s6), __CLC_FUNCTION(x.s7),   
\
-                                 __CLC_FUNCTION(x.s8), __CLC_FUNCTION(x.s9),   
\
-                                 __CLC_FUNCTION(x.sa), __CLC_FUNCTION(x.sb),   
\
-                                 __CLC_FUNCTION(x.sc), __CLC_FUNCTION(x.sd),   
\
-                                 __CLC_FUNCTION(x.se),                         
\
-                                 __CLC_FUNCTION(x.sf)} != (RET_TYPE)0);        
\
-  }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, __CLC_FUNCTION,         
\
-                                             ARG_TYPE)                         
\
-  _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE##2, __CLC_FUNCTION, ARG_TYPE##2)  
\
-  _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE##3, __CLC_FUNCTION, ARG_TYPE##3)  
\
-  _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE##4, __CLC_FUNCTION, ARG_TYPE##4)  
\
-  _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE##8, __CLC_FUNCTION, ARG_TYPE##8)  
\
-  _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE##16, __CLC_FUNCTION, 
ARG_TYPE##16)
-
-_CLC_DEF _CLC_OVERLOAD int __clc_signbit(float x) {
-  return __builtin_signbitf(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(int, __clc_signbit, float)
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of __clc_signbit(double) returns an int, but the vector
-// versions return long.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_signbit(double x) {
-  return __builtin_signbit(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_signbit, double)
-
-#endif
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of __clc_signbit(half) returns an int, but the vector
-// versions return short.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_signbit(half x) {
-  return __builtin_signbit(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_signbit, half)
-
-#endif
+#define __CLC_BODY "clc_signbit.inc"
+#include "clc/math/gentype.inc"
diff --git a/libclc/clc/lib/generic/relational/clc_signbit.inc 
b/libclc/clc/lib/generic/relational/clc_signbit.inc
new file mode 100644
index 0000000000000..f74bad93f513e
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_signbit.inc
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_VECSIZE_OR_1 == 1
+#define __CLC_RETTYPE __CLC_INTN
+#else
+#define __CLC_RETTYPE __CLC_S_GENTYPE
+#endif
+
+#if __CLC_FPSIZE == 32
+#define __CLC_SIGNBIT_MASK SIGNBIT_SP32
+#elif __CLC_FPSIZE == 64
+#define __CLC_SIGNBIT_MASK SIGNBIT_DP64
+#elif __CLC_FPSIZE == 16
+#define __CLC_SIGNBIT_MASK SIGNBIT_FP16
+#else
+#error "Invalid FP size"
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_RETTYPE __clc_signbit(__CLC_GENTYPE x) {
+#if __CLC_VECSIZE_OR_1 == 1
+  return (__CLC_INTN)((__CLC_AS_S_GENTYPE(x) &
+                       (__CLC_S_GENTYPE)__CLC_SIGNBIT_MASK) != 0);
+#else
+  return (__CLC_AS_S_GENTYPE(x) & (__CLC_S_GENTYPE)__CLC_SIGNBIT_MASK) !=
+                 (__CLC_S_GENTYPE)0
+             ? (__CLC_S_GENTYPE)-1
+             : (__CLC_S_GENTYPE)0;
+#endif
+}
+
+#undef __CLC_RETTYPE
+#undef __CLC_SIGNBIT_MASK

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[libclc] [libclc] Optimize and vectorize signbit (PR #199497)

Reply via email to