https://github.com/farzonl updated https://github.com/llvm/llvm-project/pull/85424
>From 681f4bbbc4aba08e285864ded62a7f01e178bf38 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi <farzonlo...@microsoft.com> Date: Thu, 14 Mar 2024 15:26:26 -0400 Subject: [PATCH 1/3] [HLSL] Implement the intrinsic --- clang/include/clang/Basic/Builtins.td | 6 + clang/lib/CodeGen/CGBuiltin.cpp | 8 ++ clang/lib/Headers/hlsl/hlsl_intrinsics.h | 110 ++++++++++++++ clang/lib/Sema/SemaChecking.cpp | 15 +- .../CodeGenHLSL/builtins/clamp-builtin.hlsl | 8 ++ clang/test/CodeGenHLSL/builtins/clamp.hlsl | 134 ++++++++++++++++++ .../test/SemaHLSL/BuiltIns/clamp-errors.hlsl | 91 ++++++++++++ llvm/include/llvm/IR/IntrinsicsDirectX.td | 2 +- llvm/lib/Target/DirectX/DXIL.td | 10 ++ .../Target/DirectX/DXILIntrinsicExpansion.cpp | 48 +++++++ llvm/test/CodeGen/DirectX/clamp.ll | 64 +++++++++ llvm/test/CodeGen/DirectX/fmax.ll | 31 ++++ llvm/test/CodeGen/DirectX/fmin.ll | 31 ++++ llvm/test/CodeGen/DirectX/smax.ll | 31 ++++ llvm/test/CodeGen/DirectX/smin.ll | 31 ++++ llvm/test/CodeGen/DirectX/umax.ll | 29 ++-- llvm/test/CodeGen/DirectX/umin.ll | 31 ++++ 17 files changed, 664 insertions(+), 16 deletions(-) create mode 100644 clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl create mode 100644 clang/test/CodeGenHLSL/builtins/clamp.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl create mode 100644 llvm/test/CodeGen/DirectX/clamp.ll create mode 100644 llvm/test/CodeGen/DirectX/fmax.ll create mode 100644 llvm/test/CodeGen/DirectX/fmin.ll create mode 100644 llvm/test/CodeGen/DirectX/smax.ll create mode 100644 llvm/test/CodeGen/DirectX/smin.ll create mode 100644 llvm/test/CodeGen/DirectX/umin.ll diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 58a2d22e7641fc..64599aaee0ced7 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4554,6 +4554,12 @@ def HLSLWaveActiveCountBits : LangBuiltin<"HLSL_LANG"> { let Prototype = "unsigned int(bool)"; } +def HLSLClamp : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_clamp"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + def HLSLCreateHandle : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_create_handle"]; let Attributes = [NoThrow, Const]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b09bf563622089..f831694fe9bc23 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17981,6 +17981,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()), Intrinsic::dx_any, ArrayRef<Value *>{Op0}, nullptr, "dx.any"); } + case Builtin::BI__builtin_hlsl_elementwise_clamp: { + Value *OpX = EmitScalarExpr(E->getArg(0)); + Value *OpMin = EmitScalarExpr(E->getArg(1)); + Value *OpMax = EmitScalarExpr(E->getArg(2)); + return Builder.CreateIntrinsic( + /*ReturnType=*/OpX->getType(), Intrinsic::dx_clamp, + ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp"); + } case Builtin::BI__builtin_hlsl_dot: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 718fb9a9b35c04..5e703772b7ee4f 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -252,6 +252,116 @@ double3 ceil(double3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil) double4 ceil(double4); +//===----------------------------------------------------------------------===// +// clamp builtins +//===----------------------------------------------------------------------===// + +/// \fn T clamp(T X, T Min, T Max) +/// \brief Clamps the specified value \a X to the specified +/// minimum ( \a Min) and maximum ( \a Max) range. +/// \param X A value to clamp. +/// \param Min The specified minimum range. +/// \param Max The specified maximum range. +/// +/// Returns The clamped value for the \a X parameter. +/// For values of -INF or INF, clamp will behave as expected. +/// However for values of NaN, the results are undefined. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +half clamp(half, half, half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +half2 clamp(half2, half2, half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +half3 clamp(half3, half3, half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +half4 clamp(half4, half4, half4); + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int16_t clamp(int16_t, int16_t, int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int16_t2 clamp(int16_t2, int16_t2, int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int16_t3 clamp(int16_t3, int16_t3, int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int16_t4 clamp(int16_t4, int16_t4, int16_t4); + +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint16_t clamp(uint16_t, uint16_t, uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint16_t2 clamp(uint16_t2, uint16_t2, uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint16_t3 clamp(uint16_t3, uint16_t3, uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint16_t4 clamp(uint16_t4, uint16_t4, uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int clamp(int, int, int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int2 clamp(int2, int2, int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int3 clamp(int3, int3, int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int4 clamp(int4, int4, int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint clamp(uint, uint, uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint2 clamp(uint2, uint2, uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint3 clamp(uint3, uint3, uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint4 clamp(uint4, uint4, uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int64_t clamp(int64_t, int64_t, int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int64_t2 clamp(int64_t2, int64_t2, int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int64_t3 clamp(int64_t3, int64_t3, int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +int64_t4 clamp(int64_t4, int64_t4, int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint64_t clamp(uint64_t, uint64_t, uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint64_t2 clamp(uint64_t2, uint64_t2, uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint64_t3 clamp(uint64_t3, uint64_t3, uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +uint64_t4 clamp(uint64_t4, uint64_t4, uint64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +float clamp(float, float, float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +float2 clamp(float2, float2, float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +float3 clamp(float3, float3, float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +float4 clamp(float4, float4, float4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +double clamp(double, double, double); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +double2 clamp(double2, double2, double2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +double3 clamp(double3, double3, double3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) +double4 clamp(double4, double4, double4); + //===----------------------------------------------------------------------===// // cos builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index d88a38eb6eb97b..c47b47d4670e53 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5303,6 +5303,17 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; break; } + case Builtin::BI__builtin_hlsl_elementwise_clamp: { + if (checkArgCount(*this, TheCall, 3)) + return true; + if (CheckVectorElementCallArgs(this, TheCall)) + return true; + if (SemaBuiltinElementwiseTernaryMath( + TheCall, /*CheckForFloatArgs*/ + TheCall->getArg(0)->getType()->hasFloatingRepresentation())) + return true; + break; + } case Builtin::BI__builtin_hlsl_dot: { if (checkArgCount(*this, TheCall, 2)) return true; @@ -5351,7 +5362,9 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; if (CheckVectorElementCallArgs(this, TheCall)) return true; - if (SemaBuiltinElementwiseTernaryMath(TheCall, /*CheckForFloatArgs*/ false)) + if (SemaBuiltinElementwiseTernaryMath( + TheCall, /*CheckForFloatArgs*/ + TheCall->getArg(0)->getType()->hasFloatingRepresentation())) return true; } } diff --git a/clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl new file mode 100644 index 00000000000000..e3ef26429e7e40 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +// CHECK-LABEL: builtin_test_clamp_int4 +// CHECK: %dx.clamp = call <4 x i32> @llvm.dx.clamp.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) +// CHECK: ret <4 x i32> %dx.clamp +int4 builtin_test_clamp_int4(int4 p0, int4 p1, int4 p2) { + return __builtin_hlsl_elementwise_clamp(p0, p1, p2); +} diff --git a/clang/test/CodeGenHLSL/builtins/clamp.hlsl b/clang/test/CodeGenHLSL/builtins/clamp.hlsl new file mode 100644 index 00000000000000..3506beb2bd1faa --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/clamp.hlsl @@ -0,0 +1,134 @@ +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF + +#ifdef __HLSL_ENABLE_16_BIT +// NATIVE_HALF: define noundef i16 @ +// NATIVE_HALF: call i16 @llvm.dx.clamp.i16( +int16_t test_clamp_short(int16_t p0, int16_t p1) { return clamp(p0, p1,p1); } +// NATIVE_HALF: define noundef <2 x i16> @ +// NATIVE_HALF: call <2 x i16> @llvm.dx.clamp.v2i16( +int16_t2 test_clamp_short2(int16_t2 p0, int16_t2 p1) { return clamp(p0, p1,p1); } +// NATIVE_HALF: define noundef <3 x i16> @ +// NATIVE_HALF: call <3 x i16> @llvm.dx.clamp.v3i16 +int16_t3 test_clamp_short3(int16_t3 p0, int16_t3 p1) { return clamp(p0, p1,p1); } +// NATIVE_HALF: define noundef <4 x i16> @ +// NATIVE_HALF: call <4 x i16> @llvm.dx.clamp.v4i16 +int16_t4 test_clamp_short4(int16_t4 p0, int16_t4 p1) { return clamp(p0, p1,p1); } + +// NATIVE_HALF: define noundef i16 @ +// NATIVE_HALF: call i16 @llvm.dx.clamp.i16( +uint16_t test_clamp_ushort(uint16_t p0, uint16_t p1) { return clamp(p0, p1,p1); } +// NATIVE_HALF: define noundef <2 x i16> @ +// NATIVE_HALF: call <2 x i16> @llvm.dx.clamp.v2i16 +uint16_t2 test_clamp_ushort2(uint16_t2 p0, uint16_t2 p1) { return clamp(p0, p1,p1); } +// NATIVE_HALF: define noundef <3 x i16> @ +// NATIVE_HALF: call <3 x i16> @llvm.dx.clamp.v3i16 +uint16_t3 test_clamp_ushort3(uint16_t3 p0, uint16_t3 p1) { return clamp(p0, p1,p1); } +// NATIVE_HALF: define noundef <4 x i16> @ +// NATIVE_HALF: call <4 x i16> @llvm.dx.clamp.v4i16 +uint16_t4 test_clamp_ushort4(uint16_t4 p0, uint16_t4 p1) { return clamp(p0, p1,p1); } +#endif + +// CHECK: define noundef i32 @ +// CHECK: call i32 @llvm.dx.clamp.i32( +int test_clamp_int(int p0, int p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <2 x i32> @ +// CHECK: call <2 x i32> @llvm.dx.clamp.v2i32 +int2 test_clamp_int2(int2 p0, int2 p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <3 x i32> @ +// CHECK: call <3 x i32> @llvm.dx.clamp.v3i32 +int3 test_clamp_int3(int3 p0, int3 p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <4 x i32> @ +// CHECK: call <4 x i32> @llvm.dx.clamp.v4i32 +int4 test_clamp_int4(int4 p0, int4 p1) { return clamp(p0, p1,p1); } + +// CHECK: define noundef i32 @ +// CHECK: call i32 @llvm.dx.clamp.i32( +int test_clamp_uint(uint p0, uint p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <2 x i32> @ +// CHECK: call <2 x i32> @llvm.dx.clamp.v2i32 +uint2 test_clamp_uint2(uint2 p0, uint2 p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <3 x i32> @ +// CHECK: call <3 x i32> @llvm.dx.clamp.v3i32 +uint3 test_clamp_uint3(uint3 p0, uint3 p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <4 x i32> @ +// CHECK: call <4 x i32> @llvm.dx.clamp.v4i32 +uint4 test_clamp_uint4(uint4 p0, uint4 p1) { return clamp(p0, p1,p1); } + +// CHECK: define noundef i64 @ +// CHECK: call i64 @llvm.dx.clamp.i64( +int64_t test_clamp_long(int64_t p0, int64_t p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <2 x i64> @ +// CHECK: call <2 x i64> @llvm.dx.clamp.v2i64 +int64_t2 test_clamp_long2(int64_t2 p0, int64_t2 p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <3 x i64> @ +// CHECK: call <3 x i64> @llvm.dx.clamp.v3i64 +int64_t3 test_clamp_long3(int64_t3 p0, int64_t3 p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <4 x i64> @ +// CHECK: call <4 x i64> @llvm.dx.clamp.v4i64 +int64_t4 test_clamp_long4(int64_t4 p0, int64_t4 p1) { return clamp(p0, p1,p1); } + +// CHECK: define noundef i64 @ +// CHECK: call i64 @llvm.dx.clamp.i64( +uint64_t test_clamp_long(uint64_t p0, uint64_t p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <2 x i64> @ +// CHECK: call <2 x i64> @llvm.dx.clamp.v2i64 +uint64_t2 test_clamp_long2(uint64_t2 p0, uint64_t2 p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <3 x i64> @ +// CHECK: call <3 x i64> @llvm.dx.clamp.v3i64 +uint64_t3 test_clamp_long3(uint64_t3 p0, uint64_t3 p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <4 x i64> @ +// CHECK: call <4 x i64> @llvm.dx.clamp.v4i64 +uint64_t4 test_clamp_long4(uint64_t4 p0, uint64_t4 p1) { return clamp(p0, p1,p1); } + +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: call half @llvm.dx.clamp.f16( +// NO_HALF: define noundef float @"?test_clamp_half +// NO_HALF: call float @llvm.dx.clamp.f32( +half test_clamp_half(half p0, half p1) { return clamp(p0, p1,p1); } +// NATIVE_HALF: define noundef <2 x half> @ +// NATIVE_HALF: call <2 x half> @llvm.dx.clamp.v2f16 +// NO_HALF: define noundef <2 x float> @"?test_clamp_half2 +// NO_HALF: call <2 x float> @llvm.dx.clamp.v2f32( +half2 test_clamp_half2(half2 p0, half2 p1) { return clamp(p0, p1,p1); } +// NATIVE_HALF: define noundef <3 x half> @ +// NATIVE_HALF: call <3 x half> @llvm.dx.clamp.v3f16 +// NO_HALF: define noundef <3 x float> @"?test_clamp_half3 +// NO_HALF: call <3 x float> @llvm.dx.clamp.v3f32( +half3 test_clamp_half3(half3 p0, half3 p1) { return clamp(p0, p1,p1); } +// NATIVE_HALF: define noundef <4 x half> @ +// NATIVE_HALF: call <4 x half> @llvm.dx.clamp.v4f16 +// NO_HALF: define noundef <4 x float> @"?test_clamp_half4 +// NO_HALF: call <4 x float> @llvm.dx.clamp.v4f32( +half4 test_clamp_half4(half4 p0, half4 p1) { return clamp(p0, p1,p1); } + +// CHECK: define noundef float @"?test_clamp_float +// CHECK: call float @llvm.dx.clamp.f32( +float test_clamp_float(float p0, float p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <2 x float> @"?test_clamp_float2 +// CHECK: call <2 x float> @llvm.dx.clamp.v2f32 +float2 test_clamp_float2(float2 p0, float2 p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <3 x float> @"?test_clamp_float3 +// CHECK: call <3 x float> @llvm.dx.clamp.v3f32 +float3 test_clamp_float3(float3 p0, float3 p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <4 x float> @"?test_clamp_float4 +// CHECK: call <4 x float> @llvm.dx.clamp.v4f32 +float4 test_clamp_float4(float4 p0, float4 p1) { return clamp(p0, p1,p1); } + +// CHECK: define noundef double @ +// CHECK: call double @llvm.dx.clamp.f64( +double test_clamp_double(double p0, double p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <2 x double> @ +// CHECK: call <2 x double> @llvm.dx.clamp.v2f64 +double2 test_clamp_double2(double2 p0, double2 p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <3 x double> @ +// CHECK: call <3 x double> @llvm.dx.clamp.v3f64 +double3 test_clamp_double3(double3 p0, double3 p1) { return clamp(p0, p1,p1); } +// CHECK: define noundef <4 x double> @ +// CHECK: call <4 x double> @llvm.dx.clamp.v4f64 +double4 test_clamp_double4(double4 p0, double4 p1) { return clamp(p0, p1,p1); } diff --git a/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl new file mode 100644 index 00000000000000..4c0e5315ce532e --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl @@ -0,0 +1,91 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected + +float2 test_no_second_arg(float2 p0) { + return __builtin_hlsl_elementwise_clamp(p0); + // expected-error@-1 {{too few arguments to function call, expected 3, have 1}} +} + +float2 test_no_third_arg(float2 p0) { + return __builtin_hlsl_elementwise_clamp(p0, p0); + // expected-error@-1 {{too few arguments to function call, expected 3, have 2}} +} + +float2 test_too_many_arg(float2 p0) { + return __builtin_hlsl_elementwise_clamp(p0, p0, p0, p0); + // expected-error@-1 {{too many arguments to function call, expected 3, have 4}} +} + +float2 test_clamp_no_second_arg(float2 p0) { + return clamp(p0); + // expected-error@-1 {{no matching function for call to 'clamp'}} +} + +float2 test_clamp_vector_size_mismatch(float3 p0, float2 p1) { + return clamp(p0, p0, p1); + // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector<float, 3>') to 'float __attribute__((ext_vector_type(2)))' (vector of 2 'float' values)}} +} + +float2 test_clamp_builtin_vector_size_mismatch(float3 p0, float2 p1) { + return __builtin_hlsl_elementwise_clamp(p0, p1, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}} +} + +float test_clamp_scalar_mismatch(float p0, half p1) { + return clamp(p1, p0, p1); + // expected-error@-1 {{call to 'clamp' is ambiguous}} +} + +float2 test_clamp_element_type_mismatch(half2 p0, float2 p1) { + return clamp(p1, p0, p1); + // expected-error@-1 {{call to 'clamp' is ambiguous}} +} + +float2 test_builtin_clamp_float2_splat(float p0, float2 p1) { + return __builtin_hlsl_elementwise_clamp(p0, p1, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}} +} + +float3 test_builtin_clamp_float3_splat(float p0, float3 p1) { + return __builtin_hlsl_elementwise_clamp(p0, p1, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}} +} + +float4 test_builtin_clamp_float4_splat(float p0, float4 p1) { + return __builtin_hlsl_elementwise_clamp(p0, p1, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}} +} + +float2 test_clamp_float2_int_splat(float2 p0, int p1) { + return __builtin_hlsl_elementwise_clamp(p0, p1, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}} +} + +float3 test_clamp_float3_int_splat(float3 p0, int p1) { + return __builtin_hlsl_elementwise_clamp(p0, p1, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}} +} + +float2 test_builtin_clamp_int_vect_to_float_vec_promotion(int2 p0, float p1) { + return __builtin_hlsl_elementwise_clamp(p0, p1, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}} +} + +float test_builtin_clamp_bool_type_promotion(bool p0) { + return __builtin_hlsl_elementwise_clamp(p0, p0, p0); + // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was 'bool')}} +} + +float builtin_bool_to_float_type_promotion(float p0, bool p1) { + return __builtin_hlsl_elementwise_clamp(p0, p0, p1); + // expected-error@-1 {{3rd argument must be a floating point type (was 'bool')}} +} + +float builtin_bool_to_float_type_promotion2(bool p0, float p1) { + return __builtin_hlsl_elementwise_clamp(p1, p0, p1); + // expected-error@-1 {{2nd argument must be a floating point type (was 'bool')}} +} + +float builtin_clamp_int_to_float_promotion(float p0, int p1) { + return __builtin_hlsl_elementwise_clamp(p0, p0, p1); + // expected-error@-1 {{3rd argument must be a floating point type (was 'int')}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 00536c71c3e2e5..c717063f7c8581 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -21,7 +21,7 @@ def int_dx_create_handle : ClangBuiltin<"__builtin_hlsl_create_handle">, Intrinsic<[ llvm_ptr_ty ], [llvm_i8_ty], [IntrWillReturn]>; def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>; - +def int_dx_clamp : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; def int_dx_dot : Intrinsic<[LLVMVectorElementType<0>], [llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>], diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index b0e587d2e7eacf..216fa5b10c8f4d 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -274,8 +274,18 @@ def Round : DXILOpMapping<26, unary, int_round, "Returns the input rounded to the nearest integer" "within a floating-point type.", [llvm_halforfloat_ty, LLVMMatchType<0>]>; +def FMax : DXILOpMapping<35, binary, int_maxnum, + "Float maximum. FMax(a,b) = a > b ? a : b">; +def FMin : DXILOpMapping<36, binary, int_minnum, + "Float minimum. FMin(a,b) = a < b ? a : b">; +def SMax : DXILOpMapping<37, binary, int_smax, + "Signed integer maximum. SMax(a,b) = a > b ? a : b">; +def SMin : DXILOpMapping<38, binary, int_smin, + "Signed integer minimum. SMin(a,b) = a < b ? a : b">; def UMax : DXILOpMapping<39, binary, int_umax, "Unsigned integer maximum. UMax(a,b) = a > b ? a : b">; +def UMin : DXILOpMapping<40, binary, int_umin, + "Unsigned integer minimum. UMin(a,b) = a < b ? a : b">; def FMad : DXILOpMapping<46, tertiary, int_fmuladd, "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m * a + b.">; def IMad : DXILOpMapping<48, tertiary, int_dx_imad, diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 0461f0490017bf..bd20861ef20d5d 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -35,6 +35,7 @@ static bool isIntrinsicExpansion(Function &F) { switch (F.getIntrinsicID()) { case Intrinsic::exp: case Intrinsic::dx_any: + case Intrinsic::dx_clamp: case Intrinsic::dx_lerp: case Intrinsic::dx_rcp: return true; @@ -132,12 +133,59 @@ static bool expandRcpIntrinsic(CallInst *Orig) { return true; } +static Intrinsic::IndependentIntrinsics +getCorrectMaxIntrinsic(Type *elemTy) { + if(elemTy->isVectorTy()) + elemTy = elemTy->getScalarType(); + if (elemTy->isIntegerTy()) { + const llvm::IntegerType* intType = llvm::cast<llvm::IntegerType>(elemTy); + if (intType->getSignBit()) + return Intrinsic::smax; + return Intrinsic::umax; + } + assert(elemTy->isFloatingPointTy()); + return Intrinsic::maxnum; +} + +static Intrinsic::IndependentIntrinsics +getCorrectMinIntrinsic(Type *elemTy) { + if(elemTy->isVectorTy()) + elemTy = elemTy->getScalarType(); + if (elemTy->isIntegerTy()) { + const llvm::IntegerType* intType = llvm::cast<llvm::IntegerType>(elemTy); + if (intType->getSignBit()) + return Intrinsic::smin; + return Intrinsic::umin; + } + assert(elemTy->isFloatingPointTy()); + return Intrinsic::minnum; +} + +static bool expandClampIntrinsic(CallInst *Orig) { + Value *X = Orig->getOperand(0); + Value *Min = Orig->getOperand(1); + Value *Max = Orig->getOperand(2); + Type *Ty = X->getType(); + IRBuilder<> Builder(Orig->getParent()); + Builder.SetInsertPoint(Orig); + auto *MaxCall = + Builder.CreateIntrinsic(Ty, getCorrectMaxIntrinsic(Ty), {X, Min}, nullptr, "dx.max"); + auto *MinCall = + Builder.CreateIntrinsic(Ty, getCorrectMinIntrinsic(Ty), {MaxCall, Max}, nullptr, "dx.min"); + + Orig->replaceAllUsesWith(MinCall); + Orig->eraseFromParent(); + return true; +} + static bool expandIntrinsic(Function &F, CallInst *Orig) { switch (F.getIntrinsicID()) { case Intrinsic::exp: return expandExpIntrinsic(Orig); case Intrinsic::dx_any: return expandAnyIntrinsic(Orig); + case Intrinsic::dx_clamp: + return expandClampIntrinsic(Orig); case Intrinsic::dx_lerp: return expandLerpIntrinsic(Orig); case Intrinsic::dx_rcp: diff --git a/llvm/test/CodeGen/DirectX/clamp.ll b/llvm/test/CodeGen/DirectX/clamp.ll new file mode 100644 index 00000000000000..8c7d845eb00af9 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/clamp.ll @@ -0,0 +1,64 @@ +; RUN: opt -S -dxil-op-lower < %s | FileCheck %s + +; Make sure dxil operation function calls for clamp are generated for i16/i32/i64. + +; CHECK-LABEL:test_clamp_i16 +define noundef i16 @test_clamp_i16(i16 noundef %a, i16 noundef %b, i16 noundef %c) { +entry: +; CHECK: call i16 @dx.op.binary.i16(i32 37, i16 %{{.*}}, i16 %{{.*}}) +; CHECK: call i16 @dx.op.binary.i16(i32 38, i16 %{{.*}}, i16 %{{.*}}) + %0 = call i16 @llvm.dx.clamp.i16(i16 %a, i16 %b, i16 %c) + ret i16 %0 +} + +; CHECK-LABEL:test_clamp_i32 +define noundef i32 @test_clamp_i32(i32 noundef %a, i32 noundef %b, i32 noundef %c) { +entry: +; CHECK: call i32 @dx.op.binary.i32(i32 37, i32 %{{.*}}, i32 %{{.*}}) +; CHECK: call i32 @dx.op.binary.i32(i32 38, i32 %{{.*}}, i32 %{{.*}}) + %0 = call i32 @llvm.dx.clamp.i32(i32 %a, i32 %b, i32 %c) + ret i32 %0 +} + +; CHECK-LABEL:test_clamp_i64 +define noundef i64 @test_clamp_i64(i64 noundef %a, i64 noundef %b, i64 noundef %c) { +entry: +; CHECK: call i64 @dx.op.binary.i64(i32 37, i64 %a, i64 %b) +; CHECK: call i64 @dx.op.binary.i64(i32 38, i64 %{{.*}}, i64 %c) + %0 = call i64 @llvm.dx.clamp.i64(i64 %a, i64 %b, i64 %c) + ret i64 %0 +} + +; CHECK-LABEL:test_clamp_half +define noundef half @test_clamp_half(half noundef %a, half noundef %b, half noundef %c) { +entry: +; CHECK: call half @dx.op.binary.f16(i32 35, half %{{.*}}, half %{{.*}}) +; CHECK: call half @dx.op.binary.f16(i32 36, half %{{.*}}, half %{{.*}}) + %0 = call half @llvm.dx.clamp.f16(half %a, half %b, half %c) + ret half %0 +} + +; CHECK-LABEL:test_clamp_float +define noundef float @test_clamp_float(float noundef %a, float noundef %b, float noundef %c) { +entry: +; CHECK: call float @dx.op.binary.f32(i32 35, float %{{.*}}, float %{{.*}}) +; CHECK: call float @dx.op.binary.f32(i32 36, float %{{.*}}, float %{{.*}}) + %0 = call float @llvm.dx.clamp.f32(float %a, float %b, float %c) + ret float %0 +} + +; CHECK-LABEL:test_clamp_double +define noundef double @test_clamp_double(double noundef %a, double noundef %b, double noundef %c) { +entry: +; CHECK: call double @dx.op.binary.f64(i32 35, double %{{.*}}, double %{{.*}}) +; CHECK: call double @dx.op.binary.f64(i32 36, double %{{.*}}, double %{{.*}}) + %0 = call double @llvm.dx.clamp.f64(double %a, double %b, double %c) + ret double %0 +} + +declare half @llvm.dx.clamp.f16(half, half, half) +declare float @llvm.dx.clamp.f32(float, float, float) +declare double @llvm.dx.clamp.f64(double, double, double) +declare i16 @llvm.dx.clamp.i16(i16, i16, i16) +declare i32 @llvm.dx.clamp.i32(i32, i32, i32) +declare i64 @llvm.dx.clamp.i64(i64, i64, i64) diff --git a/llvm/test/CodeGen/DirectX/fmax.ll b/llvm/test/CodeGen/DirectX/fmax.ll new file mode 100644 index 00000000000000..aff722c29309c0 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/fmax.ll @@ -0,0 +1,31 @@ +; RUN: opt -S -dxil-op-lower < %s | FileCheck %s + +; Make sure dxil operation function calls for fmax are generated for half/float/double. + +; CHECK-LABEL:test_fmax_half +define noundef half @test_fmax_half(half noundef %a, half noundef %b) { +entry: +; CHECK: call half @dx.op.binary.f16(i32 35, half %{{.*}}, half %{{.*}}) + %0 = call half @llvm.maxnum.f16(half %a, half %b) + ret half %0 +} + +; CHECK-LABEL:test_fmax_float +define noundef float @test_fmax_float(float noundef %a, float noundef %b) { +entry: +; CHECK: call float @dx.op.binary.f32(i32 35, float %{{.*}}, float %{{.*}}) + %0 = call float @llvm.maxnum.f32(float %a, float %b) + ret float %0 +} + +; CHECK-LABEL:test_fmax_double +define noundef double @test_fmax_double(double noundef %a, double noundef %b) { +entry: +; CHECK: call double @dx.op.binary.f64(i32 35, double %{{.*}}, double %{{.*}}) + %0 = call double @llvm.maxnum.f64(double %a, double %b) + ret double %0 +} + +declare half @llvm.maxnum.f16(half, half) +declare float @llvm.maxnum.f32(float, float) +declare double @llvm.maxnum.f64(double, double) diff --git a/llvm/test/CodeGen/DirectX/fmin.ll b/llvm/test/CodeGen/DirectX/fmin.ll new file mode 100644 index 00000000000000..2f7c209f0278ae --- /dev/null +++ b/llvm/test/CodeGen/DirectX/fmin.ll @@ -0,0 +1,31 @@ +; RUN: opt -S -dxil-op-lower < %s | FileCheck %s + +; Make sure dxil operation function calls for fmin are generated for half/float/double. + +; CHECK-LABEL:test_fmin_half +define noundef half @test_fmin_half(half noundef %a, half noundef %b) { +entry: +; CHECK: call half @dx.op.binary.f16(i32 36, half %{{.*}}, half %{{.*}}) + %0 = call half @llvm.minnum.f16(half %a, half %b) + ret half %0 +} + +; CHECK-LABEL:test_fmin_float +define noundef float @test_fmin_float(float noundef %a, float noundef %b) { +entry: +; CHECK: call float @dx.op.binary.f32(i32 36, float %{{.*}}, float %{{.*}}) + %0 = call float @llvm.minnum.f32(float %a, float %b) + ret float %0 +} + +; CHECK-LABEL:test_fmin_double +define noundef double @test_fmin_double(double noundef %a, double noundef %b) { +entry: +; CHECK: call double @dx.op.binary.f64(i32 36, double %{{.*}}, double %{{.*}}) + %0 = call double @llvm.minnum.f64(double %a, double %b) + ret double %0 +} + +declare half @llvm.minnum.f16(half, half) +declare float @llvm.minnum.f32(float, float) +declare double @llvm.minnum.f64(double, double) diff --git a/llvm/test/CodeGen/DirectX/smax.ll b/llvm/test/CodeGen/DirectX/smax.ll new file mode 100644 index 00000000000000..8b2406782c0938 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/smax.ll @@ -0,0 +1,31 @@ +; RUN: opt -S -dxil-op-lower < %s | FileCheck %s + +; Make sure dxil operation function calls for smax are generated for i16/i32/i64. + +; CHECK-LABEL:test_smax_i16 +define noundef i16 @test_smax_i16(i16 noundef %a, i16 noundef %b) { +entry: +; CHECK: call i16 @dx.op.binary.i16(i32 37, i16 %{{.*}}, i16 %{{.*}}) + %0 = call i16 @llvm.smax.i16(i16 %a, i16 %b) + ret i16 %0 +} + +; CHECK-LABEL:test_smax_i32 +define noundef i32 @test_smax_i32(i32 noundef %a, i32 noundef %b) { +entry: +; CHECK: call i32 @dx.op.binary.i32(i32 37, i32 %{{.*}}, i32 %{{.*}}) + %0 = call i32 @llvm.smax.i32(i32 %a, i32 %b) + ret i32 %0 +} + +; CHECK-LABEL:test_smax_i64 +define noundef i64 @test_smax_i64(i64 noundef %a, i64 noundef %b) { +entry: +; CHECK: call i64 @dx.op.binary.i64(i32 37, i64 %{{.*}}, i64 %{{.*}}) + %0 = call i64 @llvm.smax.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i16 @llvm.smax.i16(i16, i16) +declare i32 @llvm.smax.i32(i32, i32) +declare i64 @llvm.smax.i64(i64, i64) diff --git a/llvm/test/CodeGen/DirectX/smin.ll b/llvm/test/CodeGen/DirectX/smin.ll new file mode 100644 index 00000000000000..b2b40a1b624335 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/smin.ll @@ -0,0 +1,31 @@ +; RUN: opt -S -dxil-op-lower < %s | FileCheck %s + +; Make sure dxil operation function calls for smin are generated for i16/i32/i64. + +; CHECK-LABEL:test_smin_i16 +define noundef i16 @test_smin_i16(i16 noundef %a, i16 noundef %b) { +entry: +; CHECK: call i16 @dx.op.binary.i16(i32 38, i16 %{{.*}}, i16 %{{.*}}) + %0 = call i16 @llvm.smin.i16(i16 %a, i16 %b) + ret i16 %0 +} + +; CHECK-LABEL:test_smin_i32 +define noundef i32 @test_smin_i32(i32 noundef %a, i32 noundef %b) { +entry: +; CHECK: call i32 @dx.op.binary.i32(i32 38, i32 %{{.*}}, i32 %{{.*}}) + %0 = call i32 @llvm.smin.i32(i32 %a, i32 %b) + ret i32 %0 +} + +; CHECK-LABEL:test_smin_i64 +define noundef i64 @test_smin_i64(i64 noundef %a, i64 noundef %b) { +entry: +; CHECK: call i64 @dx.op.binary.i64(i32 38, i64 %{{.*}}, i64 %{{.*}}) + %0 = call i64 @llvm.smin.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i16 @llvm.smin.i16(i16, i16) +declare i32 @llvm.smin.i32(i32, i32) +declare i64 @llvm.smin.i64(i64, i64) diff --git a/llvm/test/CodeGen/DirectX/umax.ll b/llvm/test/CodeGen/DirectX/umax.ll index c7b6a87599279a..be0f557fc8da69 100644 --- a/llvm/test/CodeGen/DirectX/umax.ll +++ b/llvm/test/CodeGen/DirectX/umax.ll @@ -1,30 +1,31 @@ ; RUN: opt -S -dxil-op-lower < %s | FileCheck %s -; Make sure dxil operation function calls for umax are generated for i32/i64. +; Make sure dxil operation function calls for umax are generated for i16/i32/i64. -target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" -target triple = "dxil-pc-shadermodel6.7-library" +; CHECK-LABEL:test_umax_i16 +define noundef i16 @test_umax_i16(i16 noundef %a, i16 noundef %b) { +entry: +; CHECK: call i16 @dx.op.binary.i16(i32 39, i16 %{{.*}}, i16 %{{.*}}) + %0 = call i16 @llvm.umax.i16(i16 %a, i16 %b) + ret i16 %0 +} ; CHECK-LABEL:test_umax_i32 -; Function Attrs: noinline nounwind optnone -define noundef i32 @test_umax_i32(i32 noundef %a, i32 noundef %b) #0 { +define noundef i32 @test_umax_i32(i32 noundef %a, i32 noundef %b) { entry: -; CHECK:call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}}) +; CHECK: call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}}) %0 = call i32 @llvm.umax.i32(i32 %a, i32 %b) ret i32 %0 } ; CHECK-LABEL:test_umax_i64 -define noundef i64 @test_umax_i64(i64 noundef %a, i64 noundef %b) #0 { +define noundef i64 @test_umax_i64(i64 noundef %a, i64 noundef %b) { entry: -; CHECK:call i64 @dx.op.binary.i64(i32 39, i64 %{{.*}}, i64 %{{.*}}) +; CHECK: call i64 @dx.op.binary.i64(i32 39, i64 %{{.*}}, i64 %{{.*}}) %0 = call i64 @llvm.umax.i64(i64 %a, i64 %b) ret i64 %0 } -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare i32 @llvm.umax.i32(i32, i32) #1 -declare i64 @llvm.umax.i64(i64, i64) #1 - -attributes #0 = { noinline nounwind } -attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +declare i16 @llvm.umax.i16(i16, i16) +declare i32 @llvm.umax.i32(i32, i32) +declare i64 @llvm.umax.i64(i64, i64) diff --git a/llvm/test/CodeGen/DirectX/umin.ll b/llvm/test/CodeGen/DirectX/umin.ll new file mode 100644 index 00000000000000..5051c711744892 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/umin.ll @@ -0,0 +1,31 @@ +; RUN: opt -S -dxil-op-lower < %s | FileCheck %s + +; Make sure dxil operation function calls for umin are generated for i16/i32/i64. + +; CHECK-LABEL:test_umin_i16 +define noundef i16 @test_umin_i16(i16 noundef %a, i16 noundef %b) { +entry: +; CHECK: call i16 @dx.op.binary.i16(i32 40, i16 %{{.*}}, i16 %{{.*}}) + %0 = call i16 @llvm.umin.i16(i16 %a, i16 %b) + ret i16 %0 +} + +; CHECK-LABEL:test_umin_i32 +define noundef i32 @test_umin_i32(i32 noundef %a, i32 noundef %b) { +entry: +; CHECK: call i32 @dx.op.binary.i32(i32 40, i32 %{{.*}}, i32 %{{.*}}) + %0 = call i32 @llvm.umin.i32(i32 %a, i32 %b) + ret i32 %0 +} + +; CHECK-LABEL:test_umin_i64 +define noundef i64 @test_umin_i64(i64 noundef %a, i64 noundef %b) { +entry: +; CHECK: call i64 @dx.op.binary.i64(i32 40, i64 %{{.*}}, i64 %{{.*}}) + %0 = call i64 @llvm.umin.i64(i64 %a, i64 %b) + ret i64 %0 +} + +declare i16 @llvm.umin.i16(i16, i16) +declare i32 @llvm.umin.i32(i32, i32) +declare i64 @llvm.umin.i64(i64, i64) >From afe3e6dce778ae440d1ec51753e4982d4ab9b60b Mon Sep 17 00:00:00 2001 From: Farzon Lotfi <farzonlo...@microsoft.com> Date: Fri, 15 Mar 2024 02:30:54 -0400 Subject: [PATCH 2/3] [HLSL] implement `clamp` intrinsic closes #70071 - `CGBuiltin.cpp` - Add the unsigned\generic clamp intrinsic emitter. - `IntrinsicsDirectX.td` - add the `dx.clamp` & `dx.uclamp` intrinsics - `DXILIntrinsicExpansion.cpp` - add the `clamp` instruction expansion while maintaining vector form. - `SemaChecking.cpp` - Add `clamp` builtin Sema Checks. - `Builtins.td` - add a `clamp` builtin - `hlsl_intrinsics.h` - add the `clamp` api Why `clamp` as instruction expansion for DXIL? 1. SPIR-V has a GLSL `clamp` extension via: - [FClamp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#FClamp) - [UClamp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#UClamp) - [SClamp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#SClamp) 2. Further Clamp lowers to `min(max( x, min_range ), max_range)` which we have float, signed, and unsigned dixilOps for commit specific changes: make changes to support calling unsigned dxilops --- clang/lib/CodeGen/CGBuiltin.cpp | 9 ++- clang/test/CodeGenHLSL/builtins/clamp.hlsl | 24 +++--- clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl | 6 +- llvm/include/llvm/IR/IntrinsicsDirectX.td | 3 +- .../Target/DirectX/DXILIntrinsicExpansion.cpp | 48 ++++++------ llvm/test/CodeGen/DirectX/clamp-vec.ll | 74 +++++++++++++++++++ llvm/test/CodeGen/DirectX/clamp.ll | 32 +++++++- 7 files changed, 155 insertions(+), 41 deletions(-) create mode 100644 llvm/test/CodeGen/DirectX/clamp-vec.ll diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f831694fe9bc23..77fb1bc0f4eb3a 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17985,8 +17985,15 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, Value *OpX = EmitScalarExpr(E->getArg(0)); Value *OpMin = EmitScalarExpr(E->getArg(1)); Value *OpMax = EmitScalarExpr(E->getArg(2)); + + QualType Ty = E->getArg(0)->getType(); + bool IsUnsigned = false; + if (auto *VecTy = Ty->getAs<VectorType>()) + Ty = VecTy->getElementType(); + IsUnsigned = Ty->isUnsignedIntegerType(); return Builder.CreateIntrinsic( - /*ReturnType=*/OpX->getType(), Intrinsic::dx_clamp, + /*ReturnType=*/OpX->getType(), + IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp, ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp"); } case Builtin::BI__builtin_hlsl_dot: { diff --git a/clang/test/CodeGenHLSL/builtins/clamp.hlsl b/clang/test/CodeGenHLSL/builtins/clamp.hlsl index 3506beb2bd1faa..029e48ffe25865 100644 --- a/clang/test/CodeGenHLSL/builtins/clamp.hlsl +++ b/clang/test/CodeGenHLSL/builtins/clamp.hlsl @@ -21,16 +21,16 @@ int16_t3 test_clamp_short3(int16_t3 p0, int16_t3 p1) { return clamp(p0, p1,p1); int16_t4 test_clamp_short4(int16_t4 p0, int16_t4 p1) { return clamp(p0, p1,p1); } // NATIVE_HALF: define noundef i16 @ -// NATIVE_HALF: call i16 @llvm.dx.clamp.i16( +// NATIVE_HALF: call i16 @llvm.dx.uclamp.i16( uint16_t test_clamp_ushort(uint16_t p0, uint16_t p1) { return clamp(p0, p1,p1); } // NATIVE_HALF: define noundef <2 x i16> @ -// NATIVE_HALF: call <2 x i16> @llvm.dx.clamp.v2i16 +// NATIVE_HALF: call <2 x i16> @llvm.dx.uclamp.v2i16 uint16_t2 test_clamp_ushort2(uint16_t2 p0, uint16_t2 p1) { return clamp(p0, p1,p1); } // NATIVE_HALF: define noundef <3 x i16> @ -// NATIVE_HALF: call <3 x i16> @llvm.dx.clamp.v3i16 +// NATIVE_HALF: call <3 x i16> @llvm.dx.uclamp.v3i16 uint16_t3 test_clamp_ushort3(uint16_t3 p0, uint16_t3 p1) { return clamp(p0, p1,p1); } // NATIVE_HALF: define noundef <4 x i16> @ -// NATIVE_HALF: call <4 x i16> @llvm.dx.clamp.v4i16 +// NATIVE_HALF: call <4 x i16> @llvm.dx.uclamp.v4i16 uint16_t4 test_clamp_ushort4(uint16_t4 p0, uint16_t4 p1) { return clamp(p0, p1,p1); } #endif @@ -48,16 +48,16 @@ int3 test_clamp_int3(int3 p0, int3 p1) { return clamp(p0, p1,p1); } int4 test_clamp_int4(int4 p0, int4 p1) { return clamp(p0, p1,p1); } // CHECK: define noundef i32 @ -// CHECK: call i32 @llvm.dx.clamp.i32( +// CHECK: call i32 @llvm.dx.uclamp.i32( int test_clamp_uint(uint p0, uint p1) { return clamp(p0, p1,p1); } // CHECK: define noundef <2 x i32> @ -// CHECK: call <2 x i32> @llvm.dx.clamp.v2i32 +// CHECK: call <2 x i32> @llvm.dx.uclamp.v2i32 uint2 test_clamp_uint2(uint2 p0, uint2 p1) { return clamp(p0, p1,p1); } // CHECK: define noundef <3 x i32> @ -// CHECK: call <3 x i32> @llvm.dx.clamp.v3i32 +// CHECK: call <3 x i32> @llvm.dx.uclamp.v3i32 uint3 test_clamp_uint3(uint3 p0, uint3 p1) { return clamp(p0, p1,p1); } // CHECK: define noundef <4 x i32> @ -// CHECK: call <4 x i32> @llvm.dx.clamp.v4i32 +// CHECK: call <4 x i32> @llvm.dx.uclamp.v4i32 uint4 test_clamp_uint4(uint4 p0, uint4 p1) { return clamp(p0, p1,p1); } // CHECK: define noundef i64 @ @@ -74,16 +74,16 @@ int64_t3 test_clamp_long3(int64_t3 p0, int64_t3 p1) { return clamp(p0, p1,p1); } int64_t4 test_clamp_long4(int64_t4 p0, int64_t4 p1) { return clamp(p0, p1,p1); } // CHECK: define noundef i64 @ -// CHECK: call i64 @llvm.dx.clamp.i64( +// CHECK: call i64 @llvm.dx.uclamp.i64( uint64_t test_clamp_long(uint64_t p0, uint64_t p1) { return clamp(p0, p1,p1); } // CHECK: define noundef <2 x i64> @ -// CHECK: call <2 x i64> @llvm.dx.clamp.v2i64 +// CHECK: call <2 x i64> @llvm.dx.uclamp.v2i64 uint64_t2 test_clamp_long2(uint64_t2 p0, uint64_t2 p1) { return clamp(p0, p1,p1); } // CHECK: define noundef <3 x i64> @ -// CHECK: call <3 x i64> @llvm.dx.clamp.v3i64 +// CHECK: call <3 x i64> @llvm.dx.uclamp.v3i64 uint64_t3 test_clamp_long3(uint64_t3 p0, uint64_t3 p1) { return clamp(p0, p1,p1); } // CHECK: define noundef <4 x i64> @ -// CHECK: call <4 x i64> @llvm.dx.clamp.v4i64 +// CHECK: call <4 x i64> @llvm.dx.uclamp.v4i64 uint64_t4 test_clamp_long4(uint64_t4 p0, uint64_t4 p1) { return clamp(p0, p1,p1); } // NATIVE_HALF: define noundef half @ diff --git a/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl index 0b6843591455bd..97ce931bf1b5b5 100644 --- a/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl @@ -72,15 +72,15 @@ float2 test_builtin_mad_int_vect_to_float_vec_promotion(int2 p0, float p1) { float builtin_bool_to_float_type_promotion(float p0, bool p1) { return __builtin_hlsl_mad(p0, p0, p1); - // expected-error@-1 {{3rd argument must be a vector, integer or floating point type (was 'bool')}} + // expected-error@-1 {{3rd argument must be a floating point type (was 'bool')}} } float builtin_bool_to_float_type_promotion2(bool p0, float p1) { return __builtin_hlsl_mad(p1, p0, p1); - // expected-error@-1 {{2nd argument must be a vector, integer or floating point type (was 'bool')}} + // expected-error@-1 {{2nd argument must be a floating point type (was 'bool')}} } float builtin_mad_int_to_float_promotion(float p0, int p1) { return __builtin_hlsl_mad(p0, p0, p1); - // expected-error@-1 {{arguments are of different types ('double' vs 'int')}} + // expected-error@-1 {{3rd argument must be a floating point type (was 'int')}} } diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index c717063f7c8581..5c72f06f96ed12 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -21,7 +21,8 @@ def int_dx_create_handle : ClangBuiltin<"__builtin_hlsl_create_handle">, Intrinsic<[ llvm_ptr_ty ], [llvm_i8_ty], [IntrWillReturn]>; def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>; -def int_dx_clamp : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; +def int_dx_clamp : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; +def int_dx_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; def int_dx_dot : Intrinsic<[LLVMVectorElementType<0>], [llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>], diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index bd20861ef20d5d..441111a7ce7ca9 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -36,6 +36,7 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::exp: case Intrinsic::dx_any: case Intrinsic::dx_clamp: + case Intrinsic::dx_uclamp: case Intrinsic::dx_lerp: case Intrinsic::dx_rcp: return true; @@ -133,35 +134,33 @@ static bool expandRcpIntrinsic(CallInst *Orig) { return true; } -static Intrinsic::IndependentIntrinsics -getCorrectMaxIntrinsic(Type *elemTy) { - if(elemTy->isVectorTy()) - elemTy = elemTy->getScalarType(); - if (elemTy->isIntegerTy()) { - const llvm::IntegerType* intType = llvm::cast<llvm::IntegerType>(elemTy); - if (intType->getSignBit()) - return Intrinsic::smax; +static Intrinsic::ID getCorrectMaxIntrinsic(Type *elemTy, + Intrinsic::ID clampIntrinsic) { + if (clampIntrinsic == Intrinsic::dx_uclamp) return Intrinsic::umax; - } + assert(clampIntrinsic == Intrinsic::dx_clamp); + if (elemTy->isVectorTy()) + elemTy = elemTy->getScalarType(); + if (elemTy->isIntegerTy()) + return Intrinsic::smax; assert(elemTy->isFloatingPointTy()); return Intrinsic::maxnum; } -static Intrinsic::IndependentIntrinsics -getCorrectMinIntrinsic(Type *elemTy) { - if(elemTy->isVectorTy()) - elemTy = elemTy->getScalarType(); - if (elemTy->isIntegerTy()) { - const llvm::IntegerType* intType = llvm::cast<llvm::IntegerType>(elemTy); - if (intType->getSignBit()) - return Intrinsic::smin; +static Intrinsic::ID getCorrectMinIntrinsic(Type *elemTy, + Intrinsic::ID clampIntrinsic) { + if (clampIntrinsic == Intrinsic::dx_uclamp) return Intrinsic::umin; - } + assert(clampIntrinsic == Intrinsic::dx_clamp); + if (elemTy->isVectorTy()) + elemTy = elemTy->getScalarType(); + if (elemTy->isIntegerTy()) + return Intrinsic::smin; assert(elemTy->isFloatingPointTy()); return Intrinsic::minnum; } -static bool expandClampIntrinsic(CallInst *Orig) { +static bool expandClampIntrinsic(CallInst *Orig, Intrinsic::ID clampIntrinsic) { Value *X = Orig->getOperand(0); Value *Min = Orig->getOperand(1); Value *Max = Orig->getOperand(2); @@ -169,10 +168,12 @@ static bool expandClampIntrinsic(CallInst *Orig) { IRBuilder<> Builder(Orig->getParent()); Builder.SetInsertPoint(Orig); auto *MaxCall = - Builder.CreateIntrinsic(Ty, getCorrectMaxIntrinsic(Ty), {X, Min}, nullptr, "dx.max"); + Builder.CreateIntrinsic(Ty, getCorrectMaxIntrinsic(Ty, clampIntrinsic), + {X, Min}, nullptr, "dx.max"); auto *MinCall = - Builder.CreateIntrinsic(Ty, getCorrectMinIntrinsic(Ty), {MaxCall, Max}, nullptr, "dx.min"); - + Builder.CreateIntrinsic(Ty, getCorrectMinIntrinsic(Ty, clampIntrinsic), + {MaxCall, Max}, nullptr, "dx.min"); + Orig->replaceAllUsesWith(MinCall); Orig->eraseFromParent(); return true; @@ -184,8 +185,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { return expandExpIntrinsic(Orig); case Intrinsic::dx_any: return expandAnyIntrinsic(Orig); + case Intrinsic::dx_uclamp: case Intrinsic::dx_clamp: - return expandClampIntrinsic(Orig); + return expandClampIntrinsic(Orig, F.getIntrinsicID()); case Intrinsic::dx_lerp: return expandLerpIntrinsic(Orig); case Intrinsic::dx_rcp: diff --git a/llvm/test/CodeGen/DirectX/clamp-vec.ll b/llvm/test/CodeGen/DirectX/clamp-vec.ll new file mode 100644 index 00000000000000..d4f33a18b71573 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/clamp-vec.ll @@ -0,0 +1,74 @@ +; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s + +; Make sure dxil operation function calls for clamp are generated for float/int/uint vectors. + +; CHECK-LABEL: clamp_half3 +define noundef <3 x half> @clamp_half3(<3 x half> noundef %a, <3 x half> noundef %b, <3 x half> noundef %c) { +entry: + ; CHECK: call <3 x half> @llvm.maxnum.v3f16(<3 x half> %a, <3 x half> %b) + ; CHECK: call <3 x half> @llvm.minnum.v3f16(<3 x half> %{{.*}}, <3 x half> %c) + %dx.clamp = call <3 x half> @llvm.dx.clamp.v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c) + ret <3 x half> %dx.clamp +} + +; CHECK-LABEL: clamp_float4 +define noundef <4 x float> @clamp_float4(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) { +entry: + ; CHECK: call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) + ; CHECK: call <4 x float> @llvm.minnum.v4f32(<4 x float> %{{.*}}, <4 x float> %c) + %dx.clamp = call <4 x float> @llvm.dx.clamp.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) + ret <4 x float> %dx.clamp +} + +; CHECK-LABEL: clamp_double2 +define noundef <2 x double> @clamp_double2(<2 x double> noundef %a, <2 x double> noundef %b, <2 x double> noundef %c) { +entry: + ; CHECK: call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b) + ; CHECK: call <2 x double> @llvm.minnum.v2f64(<2 x double> %{{.*}}, <2 x double> %c) + %dx.clamp = call <2 x double> @llvm.dx.clamp.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) + ret <2 x double> %dx.clamp +} + +; CHECK-LABEL: clamp_int4 +define noundef <4 x i32> @clamp_int4(<4 x i32> noundef %a, <4 x i32> noundef %b, <4 x i32> noundef %c) { +entry: + ; CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b) + ; CHECK: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %c) + %dx.clamp = call <4 x i32> @llvm.dx.clamp.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %dx.clamp +} + +; CHECK-LABEL: clamp_uint16_t3 +define noundef <3 x i16> @clamp_uint16_t3(<3 x i16> noundef %a, <3 x i16> noundef %b, <3 x i16> noundef %c) { +entry: + ; CHECK: call <3 x i16> @llvm.umax.v3i16(<3 x i16> %a, <3 x i16> %b) + ; CHECK: call <3 x i16> @llvm.umin.v3i16(<3 x i16> %{{.*}}, <3 x i16> %c) + %dx.clamp = call <3 x i16> @llvm.dx.uclamp.v3i16(<3 x i16> %a, <3 x i16> %b, <3 x i16> %c) + ret <3 x i16> %dx.clamp +} + +; CHECK-LABEL: clamp_uint4 +define noundef <4 x i32> @clamp_uint4(<4 x i32> noundef %a, <4 x i32> noundef %b, <4 x i32> noundef %c) { +entry: + ; CHECK: call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %b) + ; CHECK: call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %c) + %dx.clamp = call <4 x i32> @llvm.dx.uclamp.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %dx.clamp +} + +; CHECK-LABEL: clamp_uint64_t4 +define noundef <2 x i64> @clamp_uint64_t4(<2 x i64> noundef %a, <2 x i64> noundef %b, <2 x i64> noundef %c) { +entry: + ; CHECK: call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) + ; CHECK: call <2 x i64> @llvm.umin.v2i64(<2 x i64> %{{.*}}, <2 x i64> %c) + %dx.clamp = call <2 x i64> @llvm.dx.uclamp.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %dx.clamp +} + +declare <3 x half> @llvm.dx.clamp.v3f16(<3 x half>, <3 x half>, <3 x half>) +declare <4 x float> @llvm.dx.clamp.v4f32(<4 x float>, <4 x float>, <4 x float>) +declare <2 x double> @llvm.dx.clamp.v2f64(<2 x double>, <2 x double>, <2 x double>) +declare <4 x i32> @llvm.dx.clamp.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <3 x i16> @llvm.dx.uclamp.v3i32(<3 x i16>, <3 x i32>, <3 x i16>) +declare <4 x i32> @llvm.dx.uclamp.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.dx.uclamp.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) diff --git a/llvm/test/CodeGen/DirectX/clamp.ll b/llvm/test/CodeGen/DirectX/clamp.ll index 8c7d845eb00af9..f122313b8d7dcc 100644 --- a/llvm/test/CodeGen/DirectX/clamp.ll +++ b/llvm/test/CodeGen/DirectX/clamp.ll @@ -1,6 +1,6 @@ ; RUN: opt -S -dxil-op-lower < %s | FileCheck %s -; Make sure dxil operation function calls for clamp are generated for i16/i32/i64. +; Make sure dxil operation function calls for clamp/uclamp are generated for half/float/double/i16/i32/i64. ; CHECK-LABEL:test_clamp_i16 define noundef i16 @test_clamp_i16(i16 noundef %a, i16 noundef %b, i16 noundef %c) { @@ -56,9 +56,39 @@ entry: ret double %0 } +; CHECK-LABEL:test_uclamp_i16 +define noundef i16 @test_uclamp_i16(i16 noundef %a, i16 noundef %b, i16 noundef %c) { +entry: +; CHECK: call i16 @dx.op.binary.i16(i32 39, i16 %{{.*}}, i16 %{{.*}}) +; CHECK: call i16 @dx.op.binary.i16(i32 40, i16 %{{.*}}, i16 %{{.*}}) + %0 = call i16 @llvm.dx.uclamp.i16(i16 %a, i16 %b, i16 %c) + ret i16 %0 +} + +; CHECK-LABEL:test_uclamp_i32 +define noundef i32 @test_uclamp_i32(i32 noundef %a, i32 noundef %b, i32 noundef %c) { +entry: +; CHECK: call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}}) +; CHECK: call i32 @dx.op.binary.i32(i32 40, i32 %{{.*}}, i32 %{{.*}}) + %0 = call i32 @llvm.dx.uclamp.i32(i32 %a, i32 %b, i32 %c) + ret i32 %0 +} + +; CHECK-LABEL:test_uclamp_i64 +define noundef i64 @test_uclamp_i64(i64 noundef %a, i64 noundef %b, i64 noundef %c) { +entry: +; CHECK: call i64 @dx.op.binary.i64(i32 39, i64 %a, i64 %b) +; CHECK: call i64 @dx.op.binary.i64(i32 40, i64 %{{.*}}, i64 %c) + %0 = call i64 @llvm.dx.uclamp.i64(i64 %a, i64 %b, i64 %c) + ret i64 %0 +} + declare half @llvm.dx.clamp.f16(half, half, half) declare float @llvm.dx.clamp.f32(float, float, float) declare double @llvm.dx.clamp.f64(double, double, double) declare i16 @llvm.dx.clamp.i16(i16, i16, i16) declare i32 @llvm.dx.clamp.i32(i32, i32, i32) declare i64 @llvm.dx.clamp.i64(i64, i64, i64) +declare i16 @llvm.dx.uclamp.i16(i16, i16, i16) +declare i32 @llvm.dx.uclamp.i32(i32, i32, i32) +declare i64 @llvm.dx.uclamp.i64(i64, i64, i64) >From d067025a9a522b5fc56d65cd5fd5cac95bb45c04 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi <farzonlo...@microsoft.com> Date: Fri, 15 Mar 2024 20:49:44 -0400 Subject: [PATCH 3/3] address pr comments. run clang format. --- .../Target/DirectX/DXILIntrinsicExpansion.cpp | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 441111a7ce7ca9..bc38c10a1fceb0 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -134,44 +134,43 @@ static bool expandRcpIntrinsic(CallInst *Orig) { return true; } -static Intrinsic::ID getCorrectMaxIntrinsic(Type *elemTy, - Intrinsic::ID clampIntrinsic) { - if (clampIntrinsic == Intrinsic::dx_uclamp) +static Intrinsic::ID getMaxForClamp(Type *ElemTy, + Intrinsic::ID ClampIntrinsic) { + if (ClampIntrinsic == Intrinsic::dx_uclamp) return Intrinsic::umax; - assert(clampIntrinsic == Intrinsic::dx_clamp); - if (elemTy->isVectorTy()) - elemTy = elemTy->getScalarType(); - if (elemTy->isIntegerTy()) + assert(ClampIntrinsic == Intrinsic::dx_clamp); + if (ElemTy->isVectorTy()) + ElemTy = ElemTy->getScalarType(); + if (ElemTy->isIntegerTy()) return Intrinsic::smax; - assert(elemTy->isFloatingPointTy()); + assert(ElemTy->isFloatingPointTy()); return Intrinsic::maxnum; } -static Intrinsic::ID getCorrectMinIntrinsic(Type *elemTy, - Intrinsic::ID clampIntrinsic) { - if (clampIntrinsic == Intrinsic::dx_uclamp) +static Intrinsic::ID getMinForClamp(Type *ElemTy, + Intrinsic::ID ClampIntrinsic) { + if (ClampIntrinsic == Intrinsic::dx_uclamp) return Intrinsic::umin; - assert(clampIntrinsic == Intrinsic::dx_clamp); - if (elemTy->isVectorTy()) - elemTy = elemTy->getScalarType(); - if (elemTy->isIntegerTy()) + assert(ClampIntrinsic == Intrinsic::dx_clamp); + if (ElemTy->isVectorTy()) + ElemTy = ElemTy->getScalarType(); + if (ElemTy->isIntegerTy()) return Intrinsic::smin; - assert(elemTy->isFloatingPointTy()); + assert(ElemTy->isFloatingPointTy()); return Intrinsic::minnum; } -static bool expandClampIntrinsic(CallInst *Orig, Intrinsic::ID clampIntrinsic) { +static bool expandClampIntrinsic(CallInst *Orig, Intrinsic::ID ClampIntrinsic) { Value *X = Orig->getOperand(0); Value *Min = Orig->getOperand(1); Value *Max = Orig->getOperand(2); Type *Ty = X->getType(); IRBuilder<> Builder(Orig->getParent()); Builder.SetInsertPoint(Orig); - auto *MaxCall = - Builder.CreateIntrinsic(Ty, getCorrectMaxIntrinsic(Ty, clampIntrinsic), - {X, Min}, nullptr, "dx.max"); + auto *MaxCall = Builder.CreateIntrinsic( + Ty, getMaxForClamp(Ty, ClampIntrinsic), {X, Min}, nullptr, "dx.max"); auto *MinCall = - Builder.CreateIntrinsic(Ty, getCorrectMinIntrinsic(Ty, clampIntrinsic), + Builder.CreateIntrinsic(Ty, getMinForClamp(Ty, ClampIntrinsic), {MaxCall, Max}, nullptr, "dx.min"); Orig->replaceAllUsesWith(MinCall); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits