https://github.com/Icohedron created https://github.com/llvm/llvm-project/pull/166419
Fixes #145752 This PR inverts the result of `firstbithigh` by subtracting it from integer bitwidth - 1 to match the result from DXC. Although the original issue is labeled with [DirectX], I assume the result of `firstbithigh` also needs to be inverted for SPIR-V as well because `firstbithigh` is an HLSL function and its behavior should not be different for DirectX. So this PR changes the Clang codegen for both DirectX and SPIR-V. Do let me know if this is incorrect. This Clang codegen of `firstbithigh` also does not introduce a comparison with `-1` followed by a `select` as DXC does: https://godbolt.org/z/xd6dvsT7M ```llvm %FirstbitSHi = call i32 @dx.op.unaryBits.i32(i32 34, i32 %i) %1 = sub i32 31, %FirstbitSHi %2 = icmp eq i32 %FirstbitSHi, -1 ; not emitted by clang %3 = select i1 %2, i32 -1, i32 %1 ; not emitted by clang ``` The comparison with `-1` and `select` appears to be a no-op to me. Let me know if there is a reason DXC does this and if I should add it to Clang's codegen for `firstbithigh`. >From a8fc9962edf7cb032ef45fd189ea9f40d91f362d Mon Sep 17 00:00:00 2001 From: Deric Cheung <[email protected]> Date: Mon, 3 Nov 2025 19:00:15 -0800 Subject: [PATCH] Invert firstbithigh --- .../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 72 ---------------- .../lib/Headers/hlsl/hlsl_intrinsic_helpers.h | 9 ++ clang/lib/Headers/hlsl/hlsl_intrinsics.h | 61 ++++++++++++++ .../CodeGenHLSL/builtins/firstbithigh.hlsl | 84 ++++++++++++------- 4 files changed, 124 insertions(+), 102 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index 4c5861c2c5f9d..c0914914a1262 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -1073,78 +1073,6 @@ float3 f16tof32(uint3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32) float4 f16tof32(uint4); -//===----------------------------------------------------------------------===// -// firstbithigh builtins -//===----------------------------------------------------------------------===// - -/// \fn T firstbithigh(T Val) -/// \brief Returns the location of the first set bit starting from the highest -/// order bit and working downward, per component. -/// \param Val the input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(int16_t4); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(uint16_t4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(int); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(int2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(int3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(int4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(uint64_t4); - //===----------------------------------------------------------------------===// // firstbitlow builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h index c877234479ad1..8560c75016b4f 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h @@ -148,6 +148,15 @@ template <typename T> constexpr T ldexp_impl(T X, T Exp) { return exp2(Exp) * X; } +template <typename T, int Bitwidth> constexpr uint firstbithigh_impl(T X) { + return (Bitwidth - 1) - __builtin_hlsl_elementwise_firstbithigh(X); +} + +template <typename T, int N, int Bitwidth> +constexpr vector<uint, N> firstbithigh_impl(vector<T, N> X) { + return (Bitwidth - 1) - __builtin_hlsl_elementwise_firstbithigh(X); +} + } // namespace __detail } // namespace hlsl diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 5ba5bfb9abde0..192c3a2c974d9 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -261,6 +261,67 @@ faceforward(__detail::HLSL_FIXED_VECTOR<float, L> N, return __detail::faceforward_impl(N, I, Ng); } +//===----------------------------------------------------------------------===// +// firstbithigh builtins +//===----------------------------------------------------------------------===// + +/// \fn T firstbithigh(T Val) +/// \brief Returns the location of the first set bit starting from the lowest +/// order bit and working upward, per component. +/// \param Val the input value. + +#ifdef __HLSL_ENABLE_16_BIT + +template <typename T> +_HLSL_AVAILABILITY(shadermodel, 6.2) +const inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value || + __detail::is_same<uint16_t, T>::value, + uint> firstbithigh(T X) { + return __detail::firstbithigh_impl<T, 16>(X); +} + +template <typename T, int N> +_HLSL_AVAILABILITY(shadermodel, 6.2) +const + inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value || + __detail::is_same<uint16_t, T>::value, + vector<uint, N>> firstbithigh(vector<T, N> X) { + return __detail::firstbithigh_impl<T, N, 16>(X); +} + +#endif + +template <typename T> +const inline __detail::enable_if_t< + __detail::is_same<int, T>::value || __detail::is_same<uint, T>::value, uint> +firstbithigh(T X) { + return __detail::firstbithigh_impl<T, 32>(X); +} + +template <typename T, int N> +const inline __detail::enable_if_t<__detail::is_same<int, T>::value || + __detail::is_same<uint, T>::value, + vector<uint, N>> +firstbithigh(vector<T, N> X) { + return __detail::firstbithigh_impl<T, N, 32>(X); +} + +template <typename T> +const inline __detail::enable_if_t<__detail::is_same<int64_t, T>::value || + __detail::is_same<uint64_t, T>::value, + uint> +firstbithigh(T X) { + return __detail::firstbithigh_impl<T, 64>(X); +} + +template <typename T, int N> +const inline __detail::enable_if_t<__detail::is_same<int64_t, T>::value || + __detail::is_same<uint64_t, T>::value, + vector<uint, N>> +firstbithigh(vector<T, N> X) { + return __detail::firstbithigh_impl<T, N, 64>(X); +} + //===----------------------------------------------------------------------===// // fmod builtins //===----------------------------------------------------------------------===// diff --git a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl index 368d652a6f779..c8fa942fa81ff 100644 --- a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl +++ b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl @@ -1,160 +1,184 @@ // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type -fnative-int16-type \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type -fnative-int16-type \ -// RUN: -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s -DTARGET=spv +// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ +// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=spv #ifdef __HLSL_ENABLE_16_BIT // CHECK-LABEL: test_firstbithigh_ushort -// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i16 +// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i16 +// CHECK: sub i32 15, [[FBH]] uint test_firstbithigh_ushort(uint16_t p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ushort2 -// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16 +// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16 +// CHECK: sub <2 x i32> splat (i32 15), [[FBH]] uint2 test_firstbithigh_ushort2(uint16_t2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ushort3 -// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16 +// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16 +// CHECK: sub <3 x i32> splat (i32 15), [[FBH]] uint3 test_firstbithigh_ushort3(uint16_t3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ushort4 -// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16 +// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16 +// CHECK: sub <4 x i32> splat (i32 15), [[FBH]] uint4 test_firstbithigh_ushort4(uint16_t4 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_short -// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i16 +// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i16 +// CHECK: sub i32 15, [[FBH]] uint test_firstbithigh_short(int16_t p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_short2 -// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16 +// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16 +// CHECK: sub <2 x i32> splat (i32 15), [[FBH]] uint2 test_firstbithigh_short2(int16_t2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_short3 -// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16 +// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16 +// CHECK: sub <3 x i32> splat (i32 15), [[FBH]] uint3 test_firstbithigh_short3(int16_t3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_short4 -// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16 +// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16 +// CHECK: sub <4 x i32> splat (i32 15), [[FBH]] uint4 test_firstbithigh_short4(int16_t4 p0) { return firstbithigh(p0); } #endif // __HLSL_ENABLE_16_BIT // CHECK-LABEL: test_firstbithigh_uint -// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i32 +// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i32 +// CHECK: sub i32 31, [[FBH]] uint test_firstbithigh_uint(uint p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_uint2 -// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32 +// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32 +// CHECK: sub <2 x i32> splat (i32 31), [[FBH]] uint2 test_firstbithigh_uint2(uint2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_uint3 -// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32 +// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32 +// CHECK: sub <3 x i32> splat (i32 31), [[FBH]] uint3 test_firstbithigh_uint3(uint3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_uint4 -// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32 +// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32 +// CHECK: sub <4 x i32> splat (i32 31), [[FBH]] uint4 test_firstbithigh_uint4(uint4 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ulong -// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i64 +// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i64 +// CHECK: sub i32 63, [[FBH]] uint test_firstbithigh_ulong(uint64_t p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ulong2 -// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64 +// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64 +// CHECK: sub <2 x i32> splat (i32 63), [[FBH]] uint2 test_firstbithigh_ulong2(uint64_t2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ulong3 -// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64 +// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64 +// CHECK: sub <3 x i32> splat (i32 63), [[FBH]] uint3 test_firstbithigh_ulong3(uint64_t3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ulong4 -// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64 +// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64 +// CHECK: sub <4 x i32> splat (i32 63), [[FBH]] uint4 test_firstbithigh_ulong4(uint64_t4 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_int -// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i32 +// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i32 +// CHECK: sub i32 31, [[FBH]] uint test_firstbithigh_int(int p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_int2 -// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32 +// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32 +// CHECK: sub <2 x i32> splat (i32 31), [[FBH]] uint2 test_firstbithigh_int2(int2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_int3 -// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32 +// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32 +// CHECK: sub <3 x i32> splat (i32 31), [[FBH]] uint3 test_firstbithigh_int3(int3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_int4 -// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32 +// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32 +// CHECK: sub <4 x i32> splat (i32 31), [[FBH]] uint4 test_firstbithigh_int4(int4 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_long -// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i64 +// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i64 +// CHECK: sub i32 63, [[FBH]] uint test_firstbithigh_long(int64_t p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_long2 -// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64 +// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64 +// CHECK: sub <2 x i32> splat (i32 63), [[FBH]] uint2 test_firstbithigh_long2(int64_t2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_long3 -// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64 +// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64 +// CHECK: sub <3 x i32> splat (i32 63), [[FBH]] uint3 test_firstbithigh_long3(int64_t3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_long4 -// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64 +// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64 +// CHECK: sub <4 x i32> splat (i32 63), [[FBH]] uint4 test_firstbithigh_long4(int64_t4 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_upcast // CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32(<4 x i32> %{{.*}}) -// CHECK: [[CONV:%.*]] = zext <4 x i32> [[FBH]] to <4 x i64> +// CHECK: [[SUB:%.*]] = sub <4 x i32> splat (i32 31), [[FBH]] +// CHECK: [[CONV:%.*]] = zext <4 x i32> [[SUB]] to <4 x i64> // CHECK: ret <4 x i64> [[CONV]] uint64_t4 test_firstbithigh_upcast(uint4 p0) { return firstbithigh(p0); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
