https://github.com/chaitanyav updated https://github.com/llvm/llvm-project/pull/178029
>From 0b80070955ca2d7c1231bed55432c96c8eac226d Mon Sep 17 00:00:00 2001 From: NagaChaitanya Vellanki <[email protected]> Date: Mon, 26 Jan 2026 09:38:42 -0800 Subject: [PATCH 1/4] [X86][Clang] Add constexpr support for _mm_min_ss/_mm_max_ss/_mm_min_sd/_mm_max_sd/_mm_min_sh/_mm_max_sh intrinsics - Added boolean IsScalar argument to the helper functions in InterpBuiltin/ExprConstant - Made minsh_round_mask, maxsh_round_mask constexpr only for _MM_FROUND_CUR_DIRECTION rounding mode. - Added helper function for scalar round mask in InterpBuiltin/ExprConstant Resolves:#175198 --- clang/include/clang/Basic/BuiltinsX86.td | 22 ++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 86 +++++++++++++++++++- clang/lib/AST/ExprConstant.cpp | 83 ++++++++++++++++++- clang/lib/Headers/avx512fp16intrin.h | 30 +++---- clang/lib/Headers/emmintrin.h | 8 +- clang/lib/Headers/xmmintrin.h | 6 +- clang/test/CodeGen/X86/avx512fp16-builtins.c | 15 ++++ clang/test/CodeGen/X86/sse-builtins.c | 4 + clang/test/CodeGen/X86/sse2-builtins.c | 4 + 9 files changed, 221 insertions(+), 37 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 23eac47eb5e4c..f419613fbed26 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -82,15 +82,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } } - foreach Op = ["min", "max"] in { - let Features = "sse" in { - def Op#ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; - } - let Features = "sse2" in { - def Op#sd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; - } - } - let Features = "sse" in { def cmpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; def cmpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; @@ -160,6 +151,8 @@ let Features = "sse", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def shufps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; def minps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; def maxps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; + def minss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; + def maxss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; } let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { @@ -180,6 +173,8 @@ let Features = "sse2", Attributes = [NoThrow] in { let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">; def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">; + def minsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; + def maxsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; } let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">; @@ -3403,6 +3398,13 @@ let Features = "avx512fp16", Attributes = [NoThrow, Const, Constexpr, RequiredVe def minph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">; } +let Features = "avx512fp16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def maxsh : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; + def minsh : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; + def maxsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">; + def minsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">; +} + let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def minph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>)">; def maxph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>)">; @@ -3418,8 +3420,6 @@ let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<1 def divsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">; def mulsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">; def subsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">; - def maxsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">; - def minsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">; } let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index fb7c51608f85b..d864c3432fdf9 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2484,7 +2484,8 @@ static bool interp__builtin_elementwise_fp_binop( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<std::optional<APFloat>( const APFloat &, const APFloat &, std::optional<APSInt> RoundingMode)> - Fn) { + Fn, + bool IsScalar = false) { assert((Call->getNumArgs() == 2) || (Call->getNumArgs() == 3)); const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); assert(VT->getElementType()->isFloatingType()); @@ -2507,6 +2508,10 @@ static bool interp__builtin_elementwise_fp_binop( const Pointer &Dst = S.Stk.peek<Pointer>(); for (unsigned ElemIdx = 0; ElemIdx != NumElems; ++ElemIdx) { using T = PrimConv<PT_Float>::T; + if (IsScalar && ElemIdx > 0) { + Dst.elem<T>(ElemIdx) = APtr.elem<T>(ElemIdx); + continue; + } APFloat ElemA = APtr.elem<T>(ElemIdx).getAPFloat(); APFloat ElemB = BPtr.elem<T>(ElemIdx).getAPFloat(); std::optional<APFloat> Result = Fn(ElemA, ElemB, RoundingMode); @@ -2520,6 +2525,45 @@ static bool interp__builtin_elementwise_fp_binop( return true; } +static bool interp__builtin_scalar_fp_round_mask_binop( + InterpState &S, CodePtr OpPC, const CallExpr *Call, + llvm::function_ref<APFloat(const APFloat &, const APFloat &)> Fn) { + assert(Call->getNumArgs() == 5); + const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); + unsigned NumElems = VT->getNumElements(); + + APSInt Rounding = popToAPSInt(S, Call->getArg(4)); + APSInt MaskVal = popToAPSInt(S, Call->getArg(3)); + const Pointer &SrcPtr = S.Stk.pop<Pointer>(); + const Pointer &BPtr = S.Stk.pop<Pointer>(); + const Pointer &APtr = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + + // Only _MM_FROUND_CUR_DIRECTION (4) is supported. + if (Rounding != 4) + return false; + + using T = PrimConv<PT_Float>::T; + + if (MaskVal.getZExtValue() & 1) { + APFloat ElemA = APtr.elem<T>(0).getAPFloat(); + APFloat ElemB = BPtr.elem<T>(0).getAPFloat(); + if (ElemA.isNaN() || ElemA.isInfinity() || ElemA.isDenormal() || + ElemB.isNaN() || ElemB.isInfinity() || ElemB.isDenormal()) + return false; + Dst.elem<T>(0) = static_cast<T>(Fn(ElemA, ElemB)); + } else { + Dst.elem<T>(0) = SrcPtr.elem<T>(0); + } + + for (unsigned I = 1; I < NumElems; ++I) + Dst.elem<T>(I) = APtr.elem<T>(I); + + Dst.initializeAllElements(); + + return true; +} + static bool interp__builtin_elementwise_int_binop( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) { @@ -5853,6 +5897,46 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return llvm::maximum(A, B); }); + case clang::X86::BI__builtin_ia32_minss: + case clang::X86::BI__builtin_ia32_minsd: + case clang::X86::BI__builtin_ia32_minsh: + return interp__builtin_elementwise_fp_binop( + S, OpPC, Call, + [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + if (A.isZero() && B.isZero()) + return B; + return llvm::minimum(A, B); + }, + /*IsScalar=*/true); + + case clang::X86::BI__builtin_ia32_maxss: + case clang::X86::BI__builtin_ia32_maxsd: + case clang::X86::BI__builtin_ia32_maxsh: + return interp__builtin_elementwise_fp_binop( + S, OpPC, Call, + [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + if (A.isZero() && B.isZero()) + return B; + return llvm::maximum(A, B); + }, + /*IsScalar=*/true); + + case clang::X86::BI__builtin_ia32_minsh_round_mask: + return interp__builtin_scalar_fp_round_mask_binop( + S, OpPC, Call, [](const APFloat &A, const APFloat &B) { + if (A.isZero() && B.isZero()) + return B; + return llvm::minimum(A, B); + }); + + case clang::X86::BI__builtin_ia32_maxsh_round_mask: + return interp__builtin_scalar_fp_round_mask_binop( + S, OpPC, Call, [](const APFloat &A, const APFloat &B) { + if (A.isZero() && B.isZero()) + return B; + return llvm::maximum(A, B); + }); + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 73768f7dd612b..96f7868d882bd 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12279,7 +12279,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { auto EvaluateFpBinOpExpr = [&](llvm::function_ref<std::optional<APFloat>( const APFloat &, const APFloat &, std::optional<APSInt>)> - Fn) { + Fn, + bool IsScalar = false) { assert(E->getNumArgs() == 2 || E->getNumArgs() == 3); APValue A, B; if (!EvaluateAsRValue(Info, E->getArg(0), A) || @@ -12302,6 +12303,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { ResultElements.reserve(NumElems); for (unsigned EltNum = 0; EltNum < NumElems; ++EltNum) { + if (IsScalar && EltNum > 0) { + ResultElements.push_back(A.getVectorElt(EltNum)); + continue; + } const APFloat &EltA = A.getVectorElt(EltNum).getFloat(); const APFloat &EltB = B.getVectorElt(EltNum).getFloat(); std::optional<APFloat> Result = Fn(EltA, EltB, RoundingMode); @@ -12312,6 +12317,44 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), NumElems), E); }; + auto EvaluateScalarFpRoundMaskBinOp = + [&](llvm::function_ref<APFloat(const APFloat &, const APFloat &)> Fn) { + assert(E->getNumArgs() == 5); + APValue VecA, VecB, VecSrc; + APSInt MaskVal, Rounding; + + if (!EvaluateAsRValue(Info, E->getArg(0), VecA) || + !EvaluateAsRValue(Info, E->getArg(1), VecB) || + !EvaluateAsRValue(Info, E->getArg(2), VecSrc) || + !EvaluateInteger(E->getArg(3), MaskVal, Info) || + !EvaluateInteger(E->getArg(4), Rounding, Info)) + return false; + + // Only _MM_FROUND_CUR_DIRECTION (4) is supported. + if (Rounding != 4) + return false; + + unsigned NumElems = VecA.getVectorLength(); + SmallVector<APValue, 8> ResultElements; + ResultElements.reserve(NumElems); + + if (MaskVal.getZExtValue() & 1) { + const APFloat &EltA = VecA.getVectorElt(0).getFloat(); + const APFloat &EltB = VecB.getVectorElt(0).getFloat(); + if (EltA.isNaN() || EltA.isInfinity() || EltA.isDenormal() || + EltB.isNaN() || EltB.isInfinity() || EltB.isDenormal()) + return false; + ResultElements.push_back(APValue(Fn(EltA, EltB))); + } else { + ResultElements.push_back(VecSrc.getVectorElt(0)); + } + + for (unsigned I = 1; I < NumElems; ++I) + ResultElements.push_back(VecA.getVectorElt(I)); + + return Success(APValue(ResultElements.data(), NumElems), E); + }; + auto EvalSelectScalar = [&](unsigned Len) -> bool { APSInt Mask; APValue AVal, WVal; @@ -14415,6 +14458,44 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return llvm::maximum(A, B); }); + case clang::X86::BI__builtin_ia32_minss: + case clang::X86::BI__builtin_ia32_minsd: + case clang::X86::BI__builtin_ia32_minsh: + return EvaluateFpBinOpExpr( + [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + if (A.isZero() && B.isZero()) + return B; + return llvm::minimum(A, B); + }, + /*IsScalar=*/true); + + case clang::X86::BI__builtin_ia32_maxss: + case clang::X86::BI__builtin_ia32_maxsd: + case clang::X86::BI__builtin_ia32_maxsh: + return EvaluateFpBinOpExpr( + [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + if (A.isZero() && B.isZero()) + return B; + return llvm::maximum(A, B); + }, + /*IsScalar=*/true); + + case clang::X86::BI__builtin_ia32_minsh_round_mask: + return EvaluateScalarFpRoundMaskBinOp( + [](const APFloat &A, const APFloat &B) { + if (A.isZero() && B.isZero()) + return B; + return llvm::minimum(A, B); + }); + + case clang::X86::BI__builtin_ia32_maxsh_round_mask: + return EvaluateScalarFpRoundMaskBinOp( + [](const APFloat &A, const APFloat &B) { + if (A.isZero() && B.isZero()) + return B; + return llvm::maximum(A, B); + }); + case clang::X86::BI__builtin_ia32_vcvtps2ph: case clang::X86::BI__builtin_ia32_vcvtps2ph256: { APValue SrcVec; diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h index 9a1d1930f66b6..4268104c3b619 100644 --- a/clang/lib/Headers/avx512fp16intrin.h +++ b/clang/lib/Headers/avx512fp16intrin.h @@ -720,25 +720,22 @@ _mm_maskz_div_sh(__mmask8 __U, __m128h __A, __m128h __B) { (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_sh(__m128h __A, - __m128h __B) { +static __inline__ __m128h + __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_min_sh(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_minsh_round_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_sh(__m128h __W, - __mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_min_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_sh(__mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_min_sh(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_minsh_round_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); @@ -759,25 +756,22 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_sh(__mmask8 __U, (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_sh(__m128h __A, - __m128h __B) { +static __inline__ __m128h + __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_max_sh(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_maxsh_round_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_sh(__m128h __W, - __mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_max_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_sh(__mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_max_sh(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_maxsh_round_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 61b35e97314fd..bbf366133c68a 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -279,8 +279,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) { /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// minimum value between both operands. The upper 64 bits are copied from /// the upper 64 bits of the first source operand. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_sd(__m128d __a, + __m128d __b) { return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b); } @@ -325,8 +325,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_pd(__m128d __a, /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// maximum value between both operands. The upper 64 bits are copied from /// the upper 64 bits of the first source operand. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_sd(__m128d __a, + __m128d __b) { return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b); } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index ab0f0c1690759..efc0e6ce47e7d 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -341,7 +341,8 @@ _mm_rsqrt_ps(__m128 __a) /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// minimum value between both operands. The upper 96 bits are copied from /// the upper 96 bits of the first source operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ss(__m128 __a, __m128 __b) { +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_ss(__m128 __a, + __m128 __b) { return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b); } @@ -384,7 +385,8 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_ps(__m128 __a, /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// maximum value between both operands. The upper 96 bits are copied from /// the upper 96 bits of the first source operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ss(__m128 __a, __m128 __b) { +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_ss(__m128 __a, + __m128 __b) { return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b); } diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c index dbd24d0899c60..840ada8f30bcd 100644 --- a/clang/test/CodeGen/X86/avx512fp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c @@ -1037,17 +1037,24 @@ __m128h test_mm_mask_min_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) // CHECK: @llvm.x86.avx512fp16.mask.min.sh.round return _mm_mask_min_sh(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_m128h(_mm_mask_min_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x01,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f)); +TEST_CONSTEXPR(match_m128h(_mm_mask_min_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x00,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),1.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f)); + __m128h test_mm_maskz_min_sh(__mmask8 __U, __m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_maskz_min_sh // CHECK: @llvm.x86.avx512fp16.mask.min.sh.round return _mm_maskz_min_sh(__U, __A, __B); } +TEST_CONSTEXPR(match_m128h(_mm_maskz_min_sh((__mmask8)0x01,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f)); +TEST_CONSTEXPR(match_m128h(_mm_maskz_min_sh((__mmask8)0x00,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),0.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f)); __m128h test_mm_min_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_min_sh // CHECK: @llvm.x86.avx512fp16.mask.min.sh.round return _mm_min_sh(__A, __B); } +TEST_CONSTEXPR(match_m128h(_mm_min_sh((__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f)); +TEST_CONSTEXPR(match_m128h(_mm_min_sh((__m128h)(__v8hf){+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f},(__m128h)(__v8hf){-0.0f,-0.0f,-0.0f,-0.0f,-0.0f,-0.0f,-0.0f,-0.0f}),-0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f)); __m128h test_mm_max_round_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_max_round_sh @@ -1069,17 +1076,25 @@ __m128h test_mm_mask_max_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) // CHECK: @llvm.x86.avx512fp16.mask.max.sh.round return _mm_mask_max_sh(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_m128h(_mm_mask_max_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x01,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),100.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f)); +TEST_CONSTEXPR(match_m128h(_mm_mask_max_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x00,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),1.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f)); + __m128h test_mm_maskz_max_sh(__mmask8 __U, __m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_maskz_max_sh // CHECK: @llvm.x86.avx512fp16.mask.max.sh.round return _mm_maskz_max_sh(__U, __A, __B); } +TEST_CONSTEXPR(match_m128h(_mm_maskz_max_sh((__mmask8)0x01,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),100.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f)); +TEST_CONSTEXPR(match_m128h(_mm_maskz_max_sh((__mmask8)0x00,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),0.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f)); __m128h test_mm_max_sh(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_max_sh // CHECK: @llvm.x86.avx512fp16.mask.max.sh.round return _mm_max_sh(__A, __B); } +TEST_CONSTEXPR(match_m128h(_mm_max_sh((__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),100.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f)); +TEST_CONSTEXPR(match_m128h(_mm_max_sh((__m128h)(__v8hf){+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f},(__m128h)(__v8hf){-0.0f,-0.0f,-0.0f,-0.0f,-0.0f,-0.0f,-0.0f,-0.0f}),-0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f)); + __mmask32 test_mm512_cmp_round_ph_mask(__m512h a, __m512h b) { // CHECK-LABEL: test_mm512_cmp_round_ph_mask // CHECK: fcmp oeq <32 x half> %{{.*}}, %{{.*}} diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index edd9f00bae2b2..87b9a99a0e058 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -524,6 +524,8 @@ __m128 test_mm_max_ss(__m128 A, __m128 B) { // CHECK: @llvm.x86.sse.max.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_max_ss(A, B); } +TEST_CONSTEXPR(match_m128(_mm_max_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+4.0f, +3.0f, +2.0f, +1.0f}), +4.0f, +2.0f, +3.0f, +4.0f)); +TEST_CONSTEXPR(match_m128(_mm_max_ss((__m128){+0.0f, -0.0f, +0.0f, -0.0f}, (__m128){-0.0f, +0.0f, -0.0f, +0.0f}), -0.0f, -0.0f, +0.0f, -0.0f)); __m128 test_mm_min_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_min_ps @@ -539,6 +541,8 @@ __m128 test_mm_min_ss(__m128 A, __m128 B) { // CHECK: @llvm.x86.sse.min.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_min_ss(A, B); } +TEST_CONSTEXPR(match_m128(_mm_min_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+4.0f, +3.0f, +2.0f, +1.0f}), +1.0f, +2.0f, +3.0f, +4.0f)); +TEST_CONSTEXPR(match_m128(_mm_min_ss((__m128){+0.0f, -0.0f, +0.0f, -0.0f}, (__m128){-0.0f, +0.0f, -0.0f, +0.0f}), -0.0f, -0.0f, +0.0f, -0.0f)); __m128 test_mm_move_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_move_ss diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index ab0a857b926f3..2993b8bb719d6 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -894,6 +894,8 @@ __m128d test_mm_max_sd(__m128d A, __m128d B) { // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_max_sd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_max_sd((__m128d){+1.0, +2.0}, (__m128d){+4.0, +1.0}), +4.0, +2.0)); +TEST_CONSTEXPR(match_m128d(_mm_max_sd((__m128d){+0.0, -0.0}, (__m128d){-0.0, +0.0}), -0.0, -0.0)); void test_mm_mfence(void) { // CHECK-LABEL: test_mm_mfence @@ -931,6 +933,8 @@ __m128d test_mm_min_sd(__m128d A, __m128d B) { // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_min_sd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_min_sd((__m128d){+1.0, +2.0}, (__m128d){+4.0, +1.0}), +1.0, +2.0)); +TEST_CONSTEXPR(match_m128d(_mm_min_sd((__m128d){+0.0, -0.0}, (__m128d){-0.0, +0.0}), -0.0, -0.0)); __m64 test_mm_movepi64_pi64(__m128i A) { // CHECK-LABEL: test_mm_movepi64_pi64 >From f90295703c752cf7c6eddf69a01ce3e68412d357 Mon Sep 17 00:00:00 2001 From: NagaChaitanya Vellanki <[email protected]> Date: Tue, 27 Jan 2026 08:51:38 -0800 Subject: [PATCH 2/4] Group min/max cases together for readability --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 40 ++++++++++++------------ clang/lib/AST/ExprConstant.cpp | 38 +++++++++++----------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index d864c3432fdf9..641901f5f1f30 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -5876,6 +5876,26 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return llvm::minimum(A, B); }); + case clang::X86::BI__builtin_ia32_minss: + case clang::X86::BI__builtin_ia32_minsd: + case clang::X86::BI__builtin_ia32_minsh: + return interp__builtin_elementwise_fp_binop( + S, OpPC, Call, + [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + if (A.isZero() && B.isZero()) + return B; + return llvm::minimum(A, B); + }, + /*IsScalar=*/true); + + case clang::X86::BI__builtin_ia32_minsh_round_mask: + return interp__builtin_scalar_fp_round_mask_binop( + S, OpPC, Call, [](const APFloat &A, const APFloat &B) { + if (A.isZero() && B.isZero()) + return B; + return llvm::minimum(A, B); + }); + case clang::X86::BI__builtin_ia32_maxps: case clang::X86::BI__builtin_ia32_maxpd: case clang::X86::BI__builtin_ia32_maxph128: @@ -5897,18 +5917,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return llvm::maximum(A, B); }); - case clang::X86::BI__builtin_ia32_minss: - case clang::X86::BI__builtin_ia32_minsd: - case clang::X86::BI__builtin_ia32_minsh: - return interp__builtin_elementwise_fp_binop( - S, OpPC, Call, - [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { - if (A.isZero() && B.isZero()) - return B; - return llvm::minimum(A, B); - }, - /*IsScalar=*/true); - case clang::X86::BI__builtin_ia32_maxss: case clang::X86::BI__builtin_ia32_maxsd: case clang::X86::BI__builtin_ia32_maxsh: @@ -5921,14 +5929,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, }, /*IsScalar=*/true); - case clang::X86::BI__builtin_ia32_minsh_round_mask: - return interp__builtin_scalar_fp_round_mask_binop( - S, OpPC, Call, [](const APFloat &A, const APFloat &B) { - if (A.isZero() && B.isZero()) - return B; - return llvm::minimum(A, B); - }); - case clang::X86::BI__builtin_ia32_maxsh_round_mask: return interp__builtin_scalar_fp_round_mask_binop( S, OpPC, Call, [](const APFloat &A, const APFloat &B) { diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 96f7868d882bd..38f203c329ff6 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14438,6 +14438,25 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return llvm::minimum(A, B); }); + case clang::X86::BI__builtin_ia32_minss: + case clang::X86::BI__builtin_ia32_minsd: + case clang::X86::BI__builtin_ia32_minsh: + return EvaluateFpBinOpExpr( + [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + if (A.isZero() && B.isZero()) + return B; + return llvm::minimum(A, B); + }, + /*IsScalar=*/true); + + case clang::X86::BI__builtin_ia32_minsh_round_mask: + return EvaluateScalarFpRoundMaskBinOp( + [](const APFloat &A, const APFloat &B) { + if (A.isZero() && B.isZero()) + return B; + return llvm::minimum(A, B); + }); + case clang::X86::BI__builtin_ia32_maxps: case clang::X86::BI__builtin_ia32_maxpd: case clang::X86::BI__builtin_ia32_maxps256: @@ -14458,17 +14477,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return llvm::maximum(A, B); }); - case clang::X86::BI__builtin_ia32_minss: - case clang::X86::BI__builtin_ia32_minsd: - case clang::X86::BI__builtin_ia32_minsh: - return EvaluateFpBinOpExpr( - [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { - if (A.isZero() && B.isZero()) - return B; - return llvm::minimum(A, B); - }, - /*IsScalar=*/true); - case clang::X86::BI__builtin_ia32_maxss: case clang::X86::BI__builtin_ia32_maxsd: case clang::X86::BI__builtin_ia32_maxsh: @@ -14480,14 +14488,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { }, /*IsScalar=*/true); - case clang::X86::BI__builtin_ia32_minsh_round_mask: - return EvaluateScalarFpRoundMaskBinOp( - [](const APFloat &A, const APFloat &B) { - if (A.isZero() && B.isZero()) - return B; - return llvm::minimum(A, B); - }); - case clang::X86::BI__builtin_ia32_maxsh_round_mask: return EvaluateScalarFpRoundMaskBinOp( [](const APFloat &A, const APFloat &B) { >From 5469e8526860b2b1b4ffeed59c72c20c44656f76 Mon Sep 17 00:00:00 2001 From: NagaChaitanya Vellanki <[email protected]> Date: Wed, 28 Jan 2026 08:55:32 -0800 Subject: [PATCH 3/4] Address code review comments - Move RoundingMode into the callback --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 51 +++++++++++++++++------- clang/lib/AST/ExprConstant.cpp | 42 +++++++++++++------ 2 files changed, 66 insertions(+), 27 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 641901f5f1f30..4e2821529876f 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2527,31 +2527,30 @@ static bool interp__builtin_elementwise_fp_binop( static bool interp__builtin_scalar_fp_round_mask_binop( InterpState &S, CodePtr OpPC, const CallExpr *Call, - llvm::function_ref<APFloat(const APFloat &, const APFloat &)> Fn) { + llvm::function_ref<std::optional<APFloat>(const APFloat &, const APFloat &, + std::optional<APSInt>)> + Fn) { assert(Call->getNumArgs() == 5); const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); unsigned NumElems = VT->getNumElements(); - APSInt Rounding = popToAPSInt(S, Call->getArg(4)); - APSInt MaskVal = popToAPSInt(S, Call->getArg(3)); + uint64_t Rounding = popToUInt64(S, Call->getArg(4)); + uint64_t MaskVal = popToUInt64(S, Call->getArg(3)); const Pointer &SrcPtr = S.Stk.pop<Pointer>(); const Pointer &BPtr = S.Stk.pop<Pointer>(); const Pointer &APtr = S.Stk.pop<Pointer>(); const Pointer &Dst = S.Stk.peek<Pointer>(); - // Only _MM_FROUND_CUR_DIRECTION (4) is supported. - if (Rounding != 4) - return false; - using T = PrimConv<PT_Float>::T; - if (MaskVal.getZExtValue() & 1) { + if (MaskVal & 1) { APFloat ElemA = APtr.elem<T>(0).getAPFloat(); APFloat ElemB = BPtr.elem<T>(0).getAPFloat(); - if (ElemA.isNaN() || ElemA.isInfinity() || ElemA.isDenormal() || - ElemB.isNaN() || ElemB.isInfinity() || ElemB.isDenormal()) + APSInt RoundingMode(APInt(32, Rounding), /*isUnsigned=*/true); + std::optional<APFloat> Result = Fn(ElemA, ElemB, RoundingMode); + if (!Result) return false; - Dst.elem<T>(0) = static_cast<T>(Fn(ElemA, ElemB)); + Dst.elem<T>(0) = static_cast<T>(*Result); } else { Dst.elem<T>(0) = SrcPtr.elem<T>(0); } @@ -5881,7 +5880,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_minsh: return interp__builtin_elementwise_fp_binop( S, OpPC, Call, - [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + [](const APFloat &A, const APFloat &B, + std::optional<APSInt>) -> std::optional<APFloat> { + if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() || + B.isInfinity() || B.isDenormal()) + return std::nullopt; if (A.isZero() && B.isZero()) return B; return llvm::minimum(A, B); @@ -5890,7 +5893,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_minsh_round_mask: return interp__builtin_scalar_fp_round_mask_binop( - S, OpPC, Call, [](const APFloat &A, const APFloat &B) { + S, OpPC, Call, + [](const APFloat &A, const APFloat &B, + std::optional<APSInt> RoundingMode) -> std::optional<APFloat> { + if (!RoundingMode || *RoundingMode != 4) + return std::nullopt; + if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() || + B.isInfinity() || B.isDenormal()) + return std::nullopt; if (A.isZero() && B.isZero()) return B; return llvm::minimum(A, B); @@ -5922,7 +5932,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_maxsh: return interp__builtin_elementwise_fp_binop( S, OpPC, Call, - [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + [](const APFloat &A, const APFloat &B, + std::optional<APSInt>) -> std::optional<APFloat> { + if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() || + B.isInfinity() || B.isDenormal()) + return std::nullopt; if (A.isZero() && B.isZero()) return B; return llvm::maximum(A, B); @@ -5931,7 +5945,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_maxsh_round_mask: return interp__builtin_scalar_fp_round_mask_binop( - S, OpPC, Call, [](const APFloat &A, const APFloat &B) { + S, OpPC, Call, + [](const APFloat &A, const APFloat &B, + std::optional<APSInt> RoundingMode) -> std::optional<APFloat> { + if (!RoundingMode || *RoundingMode != 4) + return std::nullopt; + if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() || + B.isInfinity() || B.isDenormal()) + return std::nullopt; if (A.isZero() && B.isZero()) return B; return llvm::maximum(A, B); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 38f203c329ff6..a404d6d764e78 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12318,7 +12318,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { }; auto EvaluateScalarFpRoundMaskBinOp = - [&](llvm::function_ref<APFloat(const APFloat &, const APFloat &)> Fn) { + [&](llvm::function_ref<std::optional<APFloat>( + const APFloat &, const APFloat &, std::optional<APSInt>)> + Fn) { assert(E->getNumArgs() == 5); APValue VecA, VecB, VecSrc; APSInt MaskVal, Rounding; @@ -12330,10 +12332,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { !EvaluateInteger(E->getArg(4), Rounding, Info)) return false; - // Only _MM_FROUND_CUR_DIRECTION (4) is supported. - if (Rounding != 4) - return false; - unsigned NumElems = VecA.getVectorLength(); SmallVector<APValue, 8> ResultElements; ResultElements.reserve(NumElems); @@ -12341,10 +12339,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (MaskVal.getZExtValue() & 1) { const APFloat &EltA = VecA.getVectorElt(0).getFloat(); const APFloat &EltB = VecB.getVectorElt(0).getFloat(); - if (EltA.isNaN() || EltA.isInfinity() || EltA.isDenormal() || - EltB.isNaN() || EltB.isInfinity() || EltB.isDenormal()) + std::optional<APFloat> Result = Fn(EltA, EltB, Rounding); + if (!Result) return false; - ResultElements.push_back(APValue(Fn(EltA, EltB))); + ResultElements.push_back(APValue(*Result)); } else { ResultElements.push_back(VecSrc.getVectorElt(0)); } @@ -14442,7 +14440,11 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_minsd: case clang::X86::BI__builtin_ia32_minsh: return EvaluateFpBinOpExpr( - [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + [](const APFloat &A, const APFloat &B, + std::optional<APSInt>) -> std::optional<APFloat> { + if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() || + B.isInfinity() || B.isDenormal()) + return std::nullopt; if (A.isZero() && B.isZero()) return B; return llvm::minimum(A, B); @@ -14451,7 +14453,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_minsh_round_mask: return EvaluateScalarFpRoundMaskBinOp( - [](const APFloat &A, const APFloat &B) { + [](const APFloat &A, const APFloat &B, + std::optional<APSInt> RoundingMode) -> std::optional<APFloat> { + if (!RoundingMode || *RoundingMode != 4) + return std::nullopt; + if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() || + B.isInfinity() || B.isDenormal()) + return std::nullopt; if (A.isZero() && B.isZero()) return B; return llvm::minimum(A, B); @@ -14481,7 +14489,11 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_maxsd: case clang::X86::BI__builtin_ia32_maxsh: return EvaluateFpBinOpExpr( - [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + [](const APFloat &A, const APFloat &B, + std::optional<APSInt>) -> std::optional<APFloat> { + if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() || + B.isInfinity() || B.isDenormal()) + return std::nullopt; if (A.isZero() && B.isZero()) return B; return llvm::maximum(A, B); @@ -14490,7 +14502,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_maxsh_round_mask: return EvaluateScalarFpRoundMaskBinOp( - [](const APFloat &A, const APFloat &B) { + [](const APFloat &A, const APFloat &B, + std::optional<APSInt> RoundingMode) -> std::optional<APFloat> { + if (!RoundingMode || *RoundingMode != 4) + return std::nullopt; + if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() || + B.isInfinity() || B.isDenormal()) + return std::nullopt; if (A.isZero() && B.isZero()) return B; return llvm::maximum(A, B); >From aa46d4126cbac61045eb5f24fc9e46447393877f Mon Sep 17 00:00:00 2001 From: NagaChaitanya Vellanki <[email protected]> Date: Wed, 28 Jan 2026 09:48:02 -0800 Subject: [PATCH 4/4] Add tests for minsh/maxsh_round_mask - Validate the rounding modes - Special values NaN, Denormal, Infinity --- .../constexpr-x86-avx512fp16-builtins.cpp | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 clang/test/SemaCXX/constexpr-x86-avx512fp16-builtins.cpp diff --git a/clang/test/SemaCXX/constexpr-x86-avx512fp16-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-avx512fp16-builtins.cpp new file mode 100644 index 0000000000000..c815f2ad063b4 --- /dev/null +++ b/clang/test/SemaCXX/constexpr-x86-avx512fp16-builtins.cpp @@ -0,0 +1,113 @@ +// RUN: %clang_cc1 -std=c++20 -ffreestanding -fexperimental-new-constant-interpreter -triple x86_64-unknown-unknown -target-feature +avx512fp16 -verify %s + +#include <immintrin.h> +#include "../CodeGen/X86/builtin_test_helpers.h" + +constexpr int ROUND_CUR_DIRECTION = 4; +constexpr int ROUND_NO_EXC = 8; +constexpr int ROUND_CUR_DIRECTION_NO_EXC = 12; + +namespace Test_mm_min_sh_round_mask_invalid_rounding { +constexpr __m128h a = (__m128h)(__v8hf){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}; +constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}; +constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; +constexpr __m128h result = (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_NO_EXC); // expected-error {{must be initialized by a constant expression}} +} + +namespace Test_mm_max_sh_round_mask_invalid_rounding_8 { +constexpr __m128h a = (__m128h)(__v8hf){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}; +constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}; +constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; +constexpr __m128h result = (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_NO_EXC); // expected-error {{must be initialized by a constant expression}} +} + +namespace Test_mm_max_sh_round_mask_invalid_rounding_12 { +constexpr __m128h a = (__m128h)(__v8hf){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}; +constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}; +constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; +constexpr __m128h result = (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_CUR_DIRECTION_NO_EXC); // expected-error {{must be initialized by a constant expression}} +} + +namespace Test_mm_min_sh_round_mask_valid_rounding { +constexpr __m128h a = (__m128h)(__v8hf){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}; +constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}; +constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; +constexpr __m128h result = (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_CUR_DIRECTION); +static_assert(match_m128h(result, 10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f)); +} + +namespace Test_mm_max_sh_round_mask_valid_rounding { +constexpr __m128h a = (__m128h)(__v8hf){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}; +constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}; +constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; +constexpr __m128h result = (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_CUR_DIRECTION); +static_assert(match_m128h(result, 100.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f)); +} + +namespace Test_mm_min_sh_round_mask_nan { +constexpr __m128h a = (__m128h)(__v8hf){__builtin_nanf16(""), 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}; +constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}; +constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; +constexpr __m128h result = (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}} +} + +namespace Test_mm_min_sh_round_mask_pos_infinity { +constexpr __m128h a = (__m128h)(__v8hf){__builtin_inff16(), 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}; +constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}; +constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; +constexpr __m128h result = (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}} +} + +namespace Test_mm_min_sh_round_mask_neg_infinity { +constexpr __m128h a = (__m128h)(__v8hf){-__builtin_inff16(), 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}; +constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}; +constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; +constexpr __m128h result = (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}} +} + +namespace Test_mm_min_sh_round_mask_denormal { +constexpr _Float16 denormal = 0x1.0p-15f16; +constexpr __m128h a = (__m128h)(__v8hf){denormal, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}; +constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}; +constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; +constexpr __m128h result = (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}} +} + +namespace Test_mm_min_sh_valid { +constexpr __m128h a = (__m128h)(__v8hf){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}; +constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}; +constexpr __m128h result = _mm_min_sh(a, b); +static_assert(match_m128h(result, 10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f)); +} + +namespace Test_mm_max_sh_valid { +constexpr __m128h a = (__m128h)(__v8hf){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}; +constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}; +constexpr __m128h result = _mm_max_sh(a, b); +static_assert(match_m128h(result, 100.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f)); +} + +namespace Test_mm_min_sh_nan { +constexpr __m128h a = (__m128h)(__v8hf){__builtin_nanf16(""), 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}; +constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}; +constexpr __m128h result = _mm_min_sh(a, b); // expected-error {{must be initialized by a constant expression}} +} + +namespace Test_mm_min_sh_pos_infinity { +constexpr __m128h a = (__m128h)(__v8hf){__builtin_inff16(), 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}; +constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}; +constexpr __m128h result = _mm_min_sh(a, b); // expected-error {{must be initialized by a constant expression}} +} + +namespace Test_mm_min_sh_neg_infinity { +constexpr __m128h a = (__m128h)(__v8hf){-__builtin_inff16(), 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}; +constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}; +constexpr __m128h result = _mm_min_sh(a, b); // expected-error {{must be initialized by a constant expression}} +} + +namespace Test_mm_min_sh_denormal { +constexpr _Float16 denormal = 0x1.0p-15f16; +constexpr __m128h a = (__m128h)(__v8hf){denormal, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f}; +constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f}; +constexpr __m128h result = _mm_min_sh(a, b); // expected-error {{must be initialized by a constant expression}} +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
