https://github.com/wenju-he created
https://github.com/llvm/llvm-project/pull/199497
Replace element-wise scalarizing implementation with bitwise masking. For
example,
define hidden range(i32 -1, 1) <2 x i32> @_Z7signbitDv2_f(<2 x float> noundef
%0) #0 {
%2 = bitcast <2 x float> %0 to <2 x i32>
%3 = extractelement <2 x i32> %2, i64 0
%4 = lshr i32 %3, 31
%5 = insertelement <2 x i32> poison, i32 %4, i64 0
%6 = extractelement <2 x i32> %2, i64 1
%7 = lshr i32 %6, 31
%8 = insertelement <2 x i32> %5, i32 %7, i64 1
%9 = icmp ne <2 x i32> %8, zeroinitializer
%10 = sext <2 x i1> %9 to <2 x i32>
ret <2 x i32> %10
}
is changed to:
define hidden noundef range(i32 -1, 1) <2 x i32> @_Z7signbitDv2_f(<2 x float>
noundef %0) #0 {
%2 = bitcast <2 x float> %0 to <2 x i32>
%3 = ashr <2 x i32> %2, splat (i32 31)
ret <2 x i32> %3
}
>From 89c0b81d7f096c36b6b3411610bbf636c677b492 Mon Sep 17 00:00:00 2001
From: Wenju He <[email protected]>
Date: Mon, 25 May 2026 10:00:31 +0200
Subject: [PATCH] [libclc] Optimize and vectorize signbit
Replace element-wise scalarizing implementation with bitwise masking.
For example,
define hidden range(i32 -1, 1) <2 x i32> @_Z7signbitDv2_f(<2 x float> noundef
%0) #0 {
%2 = bitcast <2 x float> %0 to <2 x i32>
%3 = extractelement <2 x i32> %2, i64 0
%4 = lshr i32 %3, 31
%5 = insertelement <2 x i32> poison, i32 %4, i64 0
%6 = extractelement <2 x i32> %2, i64 1
%7 = lshr i32 %6, 31
%8 = insertelement <2 x i32> %5, i32 %7, i64 1
%9 = icmp ne <2 x i32> %8, zeroinitializer
%10 = sext <2 x i1> %9 to <2 x i32>
ret <2 x i32> %10
}
is changed to:
define hidden noundef range(i32 -1, 1) <2 x i32> @_Z7signbitDv2_f(<2 x float>
noundef %0) #0 {
%2 = bitcast <2 x float> %0 to <2 x i32>
%3 = ashr <2 x i32> %2, splat (i32 31)
ret <2 x i32> %3
}
---
.../clc/include/clc/relational/clc_signbit.h | 2 +
.../clc/lib/generic/relational/clc_signbit.cl | 89 +------------------
.../lib/generic/relational/clc_signbit.inc | 38 ++++++++
3 files changed, 44 insertions(+), 85 deletions(-)
create mode 100644 libclc/clc/lib/generic/relational/clc_signbit.inc
diff --git a/libclc/clc/include/clc/relational/clc_signbit.h
b/libclc/clc/include/clc/relational/clc_signbit.h
index 45677fba6cb89..1656ba1bcae76 100644
--- a/libclc/clc/include/clc/relational/clc_signbit.h
+++ b/libclc/clc/include/clc/relational/clc_signbit.h
@@ -9,6 +9,8 @@
#ifndef __CLC_RELATIONAL_CLC_SIGNBIT_H__
#define __CLC_RELATIONAL_CLC_SIGNBIT_H__
+#include "clc/internal/clc.h"
+
#define __CLC_FUNCTION __clc_signbit
#define __CLC_BODY "clc/relational/unary_decl.inc"
diff --git a/libclc/clc/lib/generic/relational/clc_signbit.cl
b/libclc/clc/lib/generic/relational/clc_signbit.cl
index 05d2e8a0039ad..20e7a7a14297e 100644
--- a/libclc/clc/lib/generic/relational/clc_signbit.cl
+++ b/libclc/clc/lib/generic/relational/clc_signbit.cl
@@ -6,89 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "clc/internal/clc.h"
-#include "clc/relational/relational.h"
+#include "clc/math/math.h"
+#include "clc/relational/clc_signbit.h"
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE, __CLC_FUNCTION, ARG_TYPE)
\
- _CLC_DEF _CLC_OVERLOAD RET_TYPE __CLC_FUNCTION(ARG_TYPE x) {
\
- return (RET_TYPE)((RET_TYPE){__CLC_FUNCTION(x.lo),
\
- __CLC_FUNCTION(x.hi)} != (RET_TYPE)0);
\
- }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE, __CLC_FUNCTION, ARG_TYPE)
\
- _CLC_DEF _CLC_OVERLOAD RET_TYPE __CLC_FUNCTION(ARG_TYPE x) {
\
- return (RET_TYPE)((RET_TYPE){__CLC_FUNCTION(x.s0), __CLC_FUNCTION(x.s1),
\
- __CLC_FUNCTION(x.s2)} != (RET_TYPE)0);
\
- }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE, __CLC_FUNCTION, ARG_TYPE)
\
- _CLC_DEF _CLC_OVERLOAD RET_TYPE __CLC_FUNCTION(ARG_TYPE x) {
\
- return (RET_TYPE)((RET_TYPE){__CLC_FUNCTION(x.s0), __CLC_FUNCTION(x.s1),
\
- __CLC_FUNCTION(x.s2),
\
- __CLC_FUNCTION(x.s3)} != (RET_TYPE)0);
\
- }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE, __CLC_FUNCTION, ARG_TYPE)
\
- _CLC_DEF _CLC_OVERLOAD RET_TYPE __CLC_FUNCTION(ARG_TYPE x) {
\
- return (RET_TYPE)((RET_TYPE){__CLC_FUNCTION(x.s0), __CLC_FUNCTION(x.s1),
\
- __CLC_FUNCTION(x.s2), __CLC_FUNCTION(x.s3),
\
- __CLC_FUNCTION(x.s4), __CLC_FUNCTION(x.s5),
\
- __CLC_FUNCTION(x.s6),
\
- __CLC_FUNCTION(x.s7)} != (RET_TYPE)0);
\
- }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE, __CLC_FUNCTION, ARG_TYPE)
\
- _CLC_DEF _CLC_OVERLOAD RET_TYPE __CLC_FUNCTION(ARG_TYPE x) {
\
- return (RET_TYPE)((RET_TYPE){__CLC_FUNCTION(x.s0), __CLC_FUNCTION(x.s1),
\
- __CLC_FUNCTION(x.s2), __CLC_FUNCTION(x.s3),
\
- __CLC_FUNCTION(x.s4), __CLC_FUNCTION(x.s5),
\
- __CLC_FUNCTION(x.s6), __CLC_FUNCTION(x.s7),
\
- __CLC_FUNCTION(x.s8), __CLC_FUNCTION(x.s9),
\
- __CLC_FUNCTION(x.sa), __CLC_FUNCTION(x.sb),
\
- __CLC_FUNCTION(x.sc), __CLC_FUNCTION(x.sd),
\
- __CLC_FUNCTION(x.se),
\
- __CLC_FUNCTION(x.sf)} != (RET_TYPE)0);
\
- }
-
-#define _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, __CLC_FUNCTION,
\
- ARG_TYPE)
\
- _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE##2, __CLC_FUNCTION, ARG_TYPE##2)
\
- _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE##3, __CLC_FUNCTION, ARG_TYPE##3)
\
- _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE##4, __CLC_FUNCTION, ARG_TYPE##4)
\
- _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE##8, __CLC_FUNCTION, ARG_TYPE##8)
\
- _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE##16, __CLC_FUNCTION,
ARG_TYPE##16)
-
-_CLC_DEF _CLC_OVERLOAD int __clc_signbit(float x) {
- return __builtin_signbitf(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(int, __clc_signbit, float)
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-// The scalar version of __clc_signbit(double) returns an int, but the vector
-// versions return long.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_signbit(double x) {
- return __builtin_signbit(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_signbit, double)
-
-#endif
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-// The scalar version of __clc_signbit(half) returns an int, but the vector
-// versions return short.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_signbit(half x) {
- return __builtin_signbit(x);
-}
-
-_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_signbit, half)
-
-#endif
+#define __CLC_BODY "clc_signbit.inc"
+#include "clc/math/gentype.inc"
diff --git a/libclc/clc/lib/generic/relational/clc_signbit.inc
b/libclc/clc/lib/generic/relational/clc_signbit.inc
new file mode 100644
index 0000000000000..f74bad93f513e
--- /dev/null
+++ b/libclc/clc/lib/generic/relational/clc_signbit.inc
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_VECSIZE_OR_1 == 1
+#define __CLC_RETTYPE __CLC_INTN
+#else
+#define __CLC_RETTYPE __CLC_S_GENTYPE
+#endif
+
+#if __CLC_FPSIZE == 32
+#define __CLC_SIGNBIT_MASK SIGNBIT_SP32
+#elif __CLC_FPSIZE == 64
+#define __CLC_SIGNBIT_MASK SIGNBIT_DP64
+#elif __CLC_FPSIZE == 16
+#define __CLC_SIGNBIT_MASK SIGNBIT_FP16
+#else
+#error "Invalid FP size"
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_RETTYPE __clc_signbit(__CLC_GENTYPE x) {
+#if __CLC_VECSIZE_OR_1 == 1
+ return (__CLC_INTN)((__CLC_AS_S_GENTYPE(x) &
+ (__CLC_S_GENTYPE)__CLC_SIGNBIT_MASK) != 0);
+#else
+ return (__CLC_AS_S_GENTYPE(x) & (__CLC_S_GENTYPE)__CLC_SIGNBIT_MASK) !=
+ (__CLC_S_GENTYPE)0
+ ? (__CLC_S_GENTYPE)-1
+ : (__CLC_S_GENTYPE)0;
+#endif
+}
+
+#undef __CLC_RETTYPE
+#undef __CLC_SIGNBIT_MASK
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits