Author: NagaChaitanya Vellanki Date: 2026-01-12T14:50:54Z New Revision: 52d6170c9f2d64ae78ae301d454e216aa865f248
URL: https://github.com/llvm/llvm-project/commit/52d6170c9f2d64ae78ae301d454e216aa865f248 DIFF: https://github.com/llvm/llvm-project/commit/52d6170c9f2d64ae78ae301d454e216aa865f248.diff LOG: [X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow SSE/AVX FP MAX/MIN intrinsics to be used in constexpr (#171966) * Implemented a generic function interp__builtin_elementwise_fp_binop * NaN, Infinity, Denormal cases can be integrated into the lambda in future. For, now these cases are hardcoded in the generic function Resolves: #169991 Added: Modified: clang/include/clang/Basic/BuiltinsX86.td clang/lib/AST/ByteCode/InterpBuiltin.cpp clang/lib/AST/ExprConstant.cpp clang/lib/Headers/avx512fintrin.h clang/lib/Headers/avx512fp16intrin.h clang/lib/Headers/avx512vlfp16intrin.h clang/lib/Headers/avx512vlintrin.h clang/lib/Headers/avxintrin.h clang/lib/Headers/emmintrin.h clang/lib/Headers/xmmintrin.h clang/test/CodeGen/X86/avx-builtins.c clang/test/CodeGen/X86/avx512f-builtins.c clang/test/CodeGen/X86/avx512fp16-builtins.c clang/test/CodeGen/X86/avx512vl-builtins.c clang/test/CodeGen/X86/avx512vlfp16-builtins.c clang/test/CodeGen/X86/sse-builtins.c clang/test/CodeGen/X86/sse2-builtins.c Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index b4cc4c257edc1..0776426c95d63 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -71,7 +71,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } foreach Cmp = ["cmpeq", "cmplt", "cmple", "cmpunord", "cmpneq", "cmpnlt", - "cmpnle", "cmpord", "min", "max"] in { + "cmpnle", "cmpord"] in { let Features = "sse" in { def Cmp#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; def Cmp#ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; @@ -82,6 +82,15 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } } + foreach Op = ["min", "max"] in { + let Features = "sse" in { + def Op#ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; + } + let Features = "sse2" in { + def Op#sd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; + } + } + let Features = "sse" in { def cmpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; def cmpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; @@ -149,6 +158,8 @@ let Features = "sse", Header = "xmmintrin.h", Attributes = [NoThrow, RequireDecl let Features = "sse", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def shufps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; + def minps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; + def maxps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; } let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { @@ -237,6 +248,9 @@ let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">; def shufpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; + def minpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; + def maxpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; + def psllwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">; def pslldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">; def psllqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">; @@ -451,6 +465,10 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">; def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">; def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; + def maxpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; + def maxps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; + def minpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; + def minps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; } let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { @@ -465,10 +483,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def cvttpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">; def cvtpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">; def cvttps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">; - foreach Op = ["max", "min"] in { - def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; - def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; - } } let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { @@ -1009,10 +1023,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512> def cvtpd2dq512_mask : X86Builtin<"_Vector<8, int>(_Vector<8, double>, _Vector<8, int>, unsigned char, _Constant int)">; def cvtps2udq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">; def cvtpd2udq512_mask : X86Builtin<"_Vector<8, int>(_Vector<8, double>, _Vector<8, int>, unsigned char, _Constant int)">; - def minps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">; - def minpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">; - def maxps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">; - def maxpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">; def cvtdq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">; def cvtudq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">; def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">; @@ -1023,6 +1033,10 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">; def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">; def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">; + def minps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">; + def minpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">; + def maxps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">; + def maxpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">; } let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in { @@ -3382,23 +3396,20 @@ let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<5 def subph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">; def mulph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">; def divph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">; +} + +let Features = "avx512fp16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def maxph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">; def minph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">; } -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def minph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>)">; -} - -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def minph128 : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; -} - -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def maxph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>)">; } -let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def minph128 : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; def maxph128 : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 3a68ff0e458b4..c6d4a9d63b383 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2452,6 +2452,46 @@ static bool interp__builtin_elementwise_int_unaryop( return true; } +static bool interp__builtin_elementwise_fp_binop( + InterpState &S, CodePtr OpPC, const CallExpr *Call, + llvm::function_ref<APFloat(const APFloat &, const APFloat &, + std::optional<APSInt> RoundingMode)> + Fn) { + assert((Call->getNumArgs() == 2) || (Call->getNumArgs() == 3)); + const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); + assert(VT->getElementType()->isFloatingType()); + unsigned NumElems = VT->getNumElements(); + + // Vector case. + assert(Call->getArg(0)->getType()->isVectorType() && + Call->getArg(1)->getType()->isVectorType()); + assert(VT->getElementType() == + Call->getArg(1)->getType()->castAs<VectorType>()->getElementType()); + assert(VT->getNumElements() == + Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements()); + + std::optional<APSInt> RoundingMode = std::nullopt; + if (Call->getNumArgs() == 3) + RoundingMode = popToAPSInt(S, Call->getArg(2)); + + const Pointer &BPtr = S.Stk.pop<Pointer>(); + const Pointer &APtr = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + for (unsigned ElemIdx = 0; ElemIdx != NumElems; ++ElemIdx) { + using T = PrimConv<PT_Float>::T; + APFloat ElemA = APtr.elem<T>(ElemIdx).getAPFloat(); + APFloat ElemB = BPtr.elem<T>(ElemIdx).getAPFloat(); + if (ElemA.isNaN() || ElemA.isInfinity() || ElemA.isDenormal() || + ElemB.isNaN() || ElemB.isInfinity() || ElemB.isDenormal()) + return false; + Dst.elem<T>(ElemIdx) = static_cast<T>(Fn(ElemA, ElemB, RoundingMode)); + } + + Dst.initializeAllElements(); + + return true; +} + static bool interp__builtin_elementwise_int_binop( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) { @@ -5719,6 +5759,40 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, }); } + case clang::X86::BI__builtin_ia32_minps: + case clang::X86::BI__builtin_ia32_minpd: + case clang::X86::BI__builtin_ia32_minph128: + case clang::X86::BI__builtin_ia32_minph256: + case clang::X86::BI__builtin_ia32_minps256: + case clang::X86::BI__builtin_ia32_minpd256: + case clang::X86::BI__builtin_ia32_minps512: + case clang::X86::BI__builtin_ia32_minpd512: + case clang::X86::BI__builtin_ia32_minph512: + return interp__builtin_elementwise_fp_binop( + S, OpPC, Call, + [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + if (A.isZero() && B.isZero()) + return B; + return llvm::minimum(A, B); + }); + + case clang::X86::BI__builtin_ia32_maxps: + case clang::X86::BI__builtin_ia32_maxpd: + case clang::X86::BI__builtin_ia32_maxph128: + case clang::X86::BI__builtin_ia32_maxph256: + case clang::X86::BI__builtin_ia32_maxps256: + case clang::X86::BI__builtin_ia32_maxpd256: + case clang::X86::BI__builtin_ia32_maxps512: + case clang::X86::BI__builtin_ia32_maxpd512: + case clang::X86::BI__builtin_ia32_maxph512: + return interp__builtin_elementwise_fp_binop( + S, OpPC, Call, + [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + if (A.isZero() && B.isZero()) + return B; + return llvm::maximum(A, B); + }); + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 8618979d1eba0..ec7ca893fb50e 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12280,6 +12280,42 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), SourceLen), E); }; + auto EvaluateFpBinOpExpr = + [&](llvm::function_ref<APFloat(const APFloat &, const APFloat &, + std::optional<APSInt>)> + Fn) { + assert(E->getNumArgs() == 2 || E->getNumArgs() == 3); + APValue A, B; + if (!EvaluateAsRValue(Info, E->getArg(0), A) || + !EvaluateAsRValue(Info, E->getArg(1), B)) + return false; + + assert(A.isVector() && B.isVector()); + assert(A.getVectorLength() == B.getVectorLength()); + + std::optional<APSInt> RoundingMode; + if (E->getNumArgs() == 3) { + APSInt Imm; + if (!EvaluateInteger(E->getArg(2), Imm, Info)) + return false; + RoundingMode = Imm; + } + + unsigned NumElems = A.getVectorLength(); + SmallVector<APValue, 4> ResultElements; + ResultElements.reserve(NumElems); + + for (unsigned EltNum = 0; EltNum < NumElems; ++EltNum) { + const APFloat &EltA = A.getVectorElt(EltNum).getFloat(); + const APFloat &EltB = B.getVectorElt(EltNum).getFloat(); + if (EltA.isNaN() || EltA.isInfinity() || EltA.isDenormal() || + EltB.isNaN() || EltB.isInfinity() || EltB.isDenormal()) + return false; + ResultElements.push_back(APValue(Fn(EltA, EltB, RoundingMode))); + } + return Success(APValue(ResultElements.data(), NumElems), E); + }; + auto EvalSelectScalar = [&](unsigned Len) -> bool { APSInt Mask; APValue AVal, WVal; @@ -14348,6 +14384,38 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case clang::X86::BI__builtin_ia32_minps: + case clang::X86::BI__builtin_ia32_minpd: + case clang::X86::BI__builtin_ia32_minps256: + case clang::X86::BI__builtin_ia32_minpd256: + case clang::X86::BI__builtin_ia32_minps512: + case clang::X86::BI__builtin_ia32_minpd512: + case clang::X86::BI__builtin_ia32_minph128: + case clang::X86::BI__builtin_ia32_minph256: + case clang::X86::BI__builtin_ia32_minph512: + return EvaluateFpBinOpExpr( + [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + if (A.isZero() && B.isZero()) + return B; + return llvm::minimum(A, B); + }); + + case clang::X86::BI__builtin_ia32_maxps: + case clang::X86::BI__builtin_ia32_maxpd: + case clang::X86::BI__builtin_ia32_maxps256: + case clang::X86::BI__builtin_ia32_maxpd256: + case clang::X86::BI__builtin_ia32_maxps512: + case clang::X86::BI__builtin_ia32_maxpd512: + case clang::X86::BI__builtin_ia32_maxph128: + case clang::X86::BI__builtin_ia32_maxph256: + case clang::X86::BI__builtin_ia32_maxph512: + return EvaluateFpBinOpExpr( + [](const APFloat &A, const APFloat &B, std::optional<APSInt>) { + if (A.isZero() && B.isZero()) + return B; + return llvm::maximum(A, B); + }); + case clang::X86::BI__builtin_ia32_vcvtps2ph: case clang::X86::BI__builtin_ia32_vcvtps2ph256: { APValue SrcVec; diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index e03e8689d3f8a..942ed72686740 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -932,24 +932,21 @@ _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) { (__v8df)_mm512_max_round_pd((A), (B), (R)), \ (__v8df)_mm512_setzero_pd())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_max_pd(__m512d __A, __m512d __B) -{ +static __inline__ __m512d + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_pd(__m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_max_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_max_pd(__A, __B), (__v8df)_mm512_setzero_pd()); @@ -969,31 +966,30 @@ _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_max_ps(__m512 __A, __m512 __B) -{ +static __inline__ __m512 + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_ps(__m512 __A, __m512 __B) { return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_max_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_max_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, + __mmask8 __U, + __m128 __A, + __m128 __B) { return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) __W, @@ -1001,8 +997,9 @@ _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, + __m128 __A, + __m128 __B) { return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), @@ -1028,8 +1025,10 @@ _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, + __mmask8 __U, + __m128d __A, + __m128d __B) { return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) __W, @@ -1037,8 +1036,9 @@ _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, + __m128d __A, + __m128d __B) { return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), @@ -1154,24 +1154,21 @@ _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B) { (__v8df)_mm512_min_round_pd((A), (B), (R)), \ (__v8df)_mm512_setzero_pd())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_min_pd(__m512d __A, __m512d __B) -{ +static __inline__ __m512d + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_pd(__m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_min_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_min_pd(__A, __B), (__v8df)_mm512_setzero_pd()); @@ -1191,31 +1188,30 @@ _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_min_ps(__m512 __A, __m512 __B) -{ +static __inline__ __m512 + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_ps(__m512 __A, __m512 __B) { return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_min_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_min_ps(__A, __B), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, + __mmask8 __U, + __m128 __A, + __m128 __B) { return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) __W, @@ -1223,8 +1219,9 @@ _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, + __m128 __A, + __m128 __B) { return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps (), @@ -1250,8 +1247,10 @@ _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R))) -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, + __mmask8 __U, + __m128d __A, + __m128d __B) { return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) __W, @@ -1259,8 +1258,9 @@ _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, + __m128d __A, + __m128d __B) { return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd (), diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h index 050aefbb0f769..9a1d1930f66b6 100644 --- a/clang/lib/Headers/avx512fp16intrin.h +++ b/clang/lib/Headers/avx512fp16intrin.h @@ -499,19 +499,19 @@ _mm512_maskz_div_ph(__mmask32 __U, __m512h __A, __m512h __B) { (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \ (__v32hf)_mm512_setzero_ph())) -static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_min_ph(__m512h __A, - __m512h __B) { +static __inline__ __m512h + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_ph(__m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_minph512((__v32hf)__A, (__v32hf)__B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_min_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512( (__mmask32)__U, (__v32hf)_mm512_min_ph(__A, __B), (__v32hf)__W); } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)_mm512_min_ph(__A, __B), @@ -532,19 +532,19 @@ _mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) { (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \ (__v32hf)_mm512_setzero_ph())) -static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_max_ph(__m512h __A, - __m512h __B) { +static __inline__ __m512h + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_ph(__m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_maxph512((__v32hf)__A, (__v32hf)__B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_max_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512( (__mmask32)__U, (__v32hf)_mm512_max_ph(__A, __B), (__v32hf)__W); } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 +static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B) { return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)_mm512_max_ph(__A, __B), diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h index 7a762e105e9af..4f9c7cb79e000 100644 --- a/clang/lib/Headers/avx512vlfp16intrin.h +++ b/clang/lib/Headers/avx512vlfp16intrin.h @@ -249,12 +249,12 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_ph(__mmask8 __U, (__v8hf)_mm_setzero_ph()); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_min_ph(__m256h __A, - __m256h __B) { +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_min_ph(__m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, @@ -262,7 +262,7 @@ _mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { (__v16hf)__W); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, @@ -270,34 +270,31 @@ _mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) { (__v16hf)_mm256_setzero_ph()); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_ph(__m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_min_ph(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_ph(__m128h __W, - __mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_min_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B), (__v8hf)__W); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_ph(__mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_min_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B), (__v8hf)_mm_setzero_ph()); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_max_ph(__m256h __A, - __m256h __B) { +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_max_ph(__m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, @@ -305,7 +302,7 @@ _mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { (__v16hf)__W); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 +static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, @@ -313,23 +310,20 @@ _mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) { (__v16hf)_mm256_setzero_ph()); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_ph(__m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_max_ph(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_ph(__m128h __W, - __mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_max_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B), (__v8hf)__W); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_ph(__mmask8 __U, - __m128h __A, - __m128h __B) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_max_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B), (__v8hf)_mm_setzero_ph()); diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 6e2b19c8a2f7b..ea43046240cc0 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -2597,112 +2597,112 @@ _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_max_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_max_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_max_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_max_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_max_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_max_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_max_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_max_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_min_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_min_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_min_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_min_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_min_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_min_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_min_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_min_ps(__A, __B), diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 9b45bc3e56bdb..fbd20e58329a3 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -219,9 +219,8 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_div_ps(__m256 __a, /// A 256-bit vector of [4 x double] containing one of the operands. /// \returns A 256-bit vector of [4 x double] containing the maximum values /// between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_max_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_max_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b); } @@ -240,9 +239,8 @@ _mm256_max_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the operands. /// \returns A 256-bit vector of [8 x float] containing the maximum values /// between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_max_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_max_ps(__m256 __a, + __m256 __b) { return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b); } @@ -261,9 +259,8 @@ _mm256_max_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing one of the operands. /// \returns A 256-bit vector of [4 x double] containing the minimum values /// between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_min_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_min_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b); } @@ -282,9 +279,8 @@ _mm256_min_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the operands. /// \returns A 256-bit vector of [8 x float] containing the minimum values /// between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_min_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_min_ps(__m256 __a, + __m256 __b) { return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b); } diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 1ca7097cd170a..61b35e97314fd 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -300,8 +300,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, /// A 128-bit vector of [2 x double] containing one of the operands. /// \returns A 128-bit vector of [2 x double] containing the minimum values /// between both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_pd(__m128d __a, + __m128d __b) { return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b); } @@ -346,8 +346,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, /// A 128-bit vector of [2 x double] containing one of the operands. /// \returns A 128-bit vector of [2 x double] containing the maximum values /// between both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_pd(__m128d __a, + __m128d __b) { return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b); } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 72a643948bed6..ab0f0c1690759 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -341,9 +341,7 @@ _mm_rsqrt_ps(__m128 __a) /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// minimum value between both operands. The upper 96 bits are copied from /// the upper 96 bits of the first source operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_min_ss(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ss(__m128 __a, __m128 __b) { return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b); } @@ -362,9 +360,8 @@ _mm_min_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float] containing one of the operands. /// \returns A 128-bit vector of [4 x float] containing the minimum values /// between both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_min_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b); } @@ -387,9 +384,7 @@ _mm_min_ps(__m128 __a, __m128 __b) /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// maximum value between both operands. The upper 96 bits are copied from /// the upper 96 bits of the first source operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_max_ss(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ss(__m128 __a, __m128 __b) { return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b); } @@ -408,9 +403,8 @@ _mm_max_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float] containing one of the operands. /// \returns A 128-bit vector of [4 x float] containing the maximum values /// between both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_max_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b); } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index d2f8740cffbbd..6ed4217231119 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1313,24 +1313,34 @@ __m256d test_mm256_max_pd(__m256d A, __m256d B) { // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_max_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_max_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+4.0, +3.0, +2.0, +1.0}), +4.0, +3.0, +3.0, +4.0)); +TEST_CONSTEXPR(match_m256d(_mm256_max_pd((__m256d){+0.0, -0.0, +0.0, -0.0}, (__m256d){-0.0, +0.0, -0.0, +0.0}), -0.0, +0.0, -0.0, +0.0)); +TEST_CONSTEXPR(match_m256d(_mm256_max_pd((__m256d){-1.0, -2.0, +3.0, -4.0}, (__m256d){+1.0, -3.0, +2.0, -5.0}), +1.0, -2.0, +3.0, -4.0)); __m256 test_mm256_max_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_max_ps // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_max_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_max_ps((__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m256){+8.0f, +7.0f, +6.0f, +5.0f, +4.0f, +3.0f, +2.0f, +1.0f}), +8.0f, +7.0f, +6.0f, +5.0f, +5.0f, +6.0f, +7.0f, +8.0f)); +TEST_CONSTEXPR(match_m256(_mm256_max_ps((__m256){+0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f}, (__m256){-0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f}), -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f)); __m256d test_mm256_min_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_min_pd // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_min_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_min_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+4.0, +3.0, +2.0, +1.0}), +1.0, +2.0, +2.0, +1.0)); +TEST_CONSTEXPR(match_m256d(_mm256_min_pd((__m256d){+0.0, -0.0, +0.0, -0.0}, (__m256d){-0.0, +0.0, -0.0, +0.0}), -0.0, +0.0, -0.0, +0.0)); +TEST_CONSTEXPR(match_m256d(_mm256_min_pd((__m256d){-1.0, -2.0, +3.0, -4.0}, (__m256d){+1.0, -3.0, +2.0, -5.0}), -1.0, -3.0, +2.0, -5.0)); __m256 test_mm256_min_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_min_ps // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_min_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_min_ps((__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m256){+8.0f, +7.0f, +6.0f, +5.0f, +4.0f, +3.0f, +2.0f, +1.0f}), +1.0f, +2.0f, +3.0f, +4.0f, +4.0f, +3.0f, +2.0f, +1.0f)); +TEST_CONSTEXPR(match_m256(_mm256_min_ps((__m256){+0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f}, (__m256){-0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f}), -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f)); __m256d test_mm256_movedup_pd(__m256d A) { // CHECK-LABEL: test_mm256_movedup_pd diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index f78b28d6da1b1..9366f847b3b71 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -10865,6 +10865,7 @@ __m512d test_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_max_pd (__W,__U,__A,__B); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_max_pd((__m512d){100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}, 0xF0, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0}), 100.0, 100.0, 100.0, 100.0, 5.0, 6.0, 7.0, 8.0)); __m512d test_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) { @@ -10873,6 +10874,7 @@ __m512d test_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_max_pd (__U,__A,__B); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_max_pd(0xF0, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0}), 0.0, 0.0, 0.0, 0.0, 5.0, 6.0, 7.0, 8.0)); __m512 test_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { @@ -10881,6 +10883,7 @@ __m512 test_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_max_ps (__W,__U,__A,__B); } +TEST_CONSTEXPR(match_m512(_mm512_mask_max_ps((__m512){100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f}, 0xFF00, (__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}, (__m512){16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}), 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f)); __m512d test_mm512_mask_max_round_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B) { @@ -10904,6 +10907,8 @@ __m512d test_mm512_max_round_pd(__m512d __A,__m512d __B) // CHECK: @llvm.x86.avx512.max.pd.512 return _mm512_max_round_pd(__A,__B,_MM_FROUND_NO_EXC); } +TEST_CONSTEXPR(match_m512d(_mm512_max_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0}), 8.0, 7.0, 6.0, 5.0, 5.0, 6.0, 7.0, 8.0)); +TEST_CONSTEXPR(match_m512d(_mm512_max_pd((__m512d){+0.0, -0.0, +0.0, -0.0, +0.0, -0.0, +0.0, -0.0}, (__m512d){-0.0, +0.0, -0.0, +0.0, -0.0, +0.0, -0.0, +0.0}), -0.0, +0.0, -0.0, +0.0, -0.0, +0.0, -0.0, +0.0)); __m512 test_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) { @@ -10912,6 +10917,7 @@ __m512 test_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_max_ps (__U,__A,__B); } +TEST_CONSTEXPR(match_m512(_mm512_maskz_max_ps(0xFF00, (__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}, (__m512){16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f)); __m512 test_mm512_mask_max_round_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B) { @@ -10935,6 +10941,8 @@ __m512 test_mm512_max_round_ps(__m512 __A,__m512 __B) // CHECK: @llvm.x86.avx512.max.ps.512 return _mm512_max_round_ps(__A,__B,_MM_FROUND_NO_EXC); } +TEST_CONSTEXPR(match_m512(_mm512_max_ps((__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}, (__m512){16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}), 16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f)); +TEST_CONSTEXPR(match_m512(_mm512_max_ps((__m512){+0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f}, (__m512){-0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f}), -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f)); __m512d test_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { @@ -10943,6 +10951,7 @@ __m512d test_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_min_pd (__W,__U,__A,__B); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_min_pd((__m512d){100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0}, 0xF0, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0}), 100.0, 100.0, 100.0, 100.0, 4.0, 3.0, 2.0, 1.0)); __m512d test_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) { @@ -10950,6 +10959,7 @@ __m512d test_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) // CHECK: @llvm.x86.avx512.min.pd.512 return _mm512_maskz_min_pd (__U,__A,__B); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_min_pd(0xF0, (__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0}), 0.0, 0.0, 0.0, 0.0, 4.0, 3.0, 2.0, 1.0)); __m512d test_mm512_mask_min_round_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B) { @@ -10973,6 +10983,8 @@ __m512d test_mm512_min_round_pd( __m512d __A,__m512d __B) // CHECK: @llvm.x86.avx512.min.pd.512 return _mm512_min_round_pd(__A,__B,_MM_FROUND_NO_EXC); } +TEST_CONSTEXPR(match_m512d(_mm512_min_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0}), 1.0, 2.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0)); +TEST_CONSTEXPR(match_m512d(_mm512_min_pd((__m512d){+0.0, -0.0, +0.0, -0.0, +0.0, -0.0, +0.0, -0.0}, (__m512d){-0.0, +0.0, -0.0, +0.0, -0.0, +0.0, -0.0, +0.0}), -0.0, +0.0, -0.0, +0.0, -0.0, +0.0, -0.0, +0.0)); __m512 test_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { @@ -10981,6 +10993,7 @@ __m512 test_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_min_ps (__W,__U,__A,__B); } +TEST_CONSTEXPR(match_m512(_mm512_mask_min_ps((__m512){100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f}, 0xFF00, (__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}, (__m512){16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}), 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f)); __m512 test_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) { @@ -10989,6 +11002,7 @@ __m512 test_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_min_ps (__U,__A,__B); } +TEST_CONSTEXPR(match_m512(_mm512_maskz_min_ps(0xFF00, (__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}, (__m512){16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f)); __m512 test_mm512_mask_min_round_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B) { @@ -11013,6 +11027,9 @@ __m512 test_mm512_min_round_ps(__m512 __A,__m512 __B) return _mm512_min_round_ps(__A,__B,_MM_FROUND_NO_EXC); } +TEST_CONSTEXPR(match_m512(_mm512_min_ps((__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}, (__m512){16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f)); +TEST_CONSTEXPR(match_m512(_mm512_min_ps((__m512){+0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f}, (__m512){-0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f}), -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f)); + __m512 test_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) { // CHECK-LABEL: test_mm512_mask_floor_ps diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c index 6ea88e45421a4..dbd24d0899c60 100644 --- a/clang/test/CodeGen/X86/avx512fp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c @@ -662,6 +662,8 @@ __m512h test_mm512_min_ph(__m512h __A, __m512h __B) { // CHECK: @llvm.x86.avx512fp16.min.ph.512 return _mm512_min_ph(__A, __B); } +TEST_CONSTEXPR(match_m512h(_mm512_min_ph((__m512h){1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 9.0f16, 10.0f16, 11.0f16, 12.0f16, 13.0f16, 14.0f16, 15.0f16, 16.0f16, 17.0f16, 18.0f16, 19.0f16, 20.0f16, 21.0f16, 22.0f16, 23.0f16, 24.0f16, 25.0f16, 26.0f16, 27.0f16, 28.0f16, 29.0f16, 30.0f16, 31.0f16, 32.0f16}, (__m512h){32.0f16, 31.0f16, 30.0f16, 29.0f16, 28.0f16, 27.0f16, 26.0f16, 25.0f16, 24.0f16, 23.0f16, 22.0f16, 21.0f16, 20.0f16, 19.0f16, 18.0f16, 17.0f16, 16.0f16, 15.0f16, 14.0f16, 13.0f16, 12.0f16, 11.0f16, 10.0f16, 9.0f16, 8.0f16, 7.0f16, 6.0f16, 5.0f16, 4.0f16, 3.0f16, 2.0f16, 1.0f16}), 1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 9.0f16, 10.0f16, 11.0f16, 12.0f16, 13.0f16, 14.0f16, 15.0f16, 16.0f16, 16.0f16, 15.0f16, 14.0f16, 13.0f16, 12.0f16, 11.0f16, 10.0f16, 9.0f16, 8.0f16, 7.0f16, 6.0f16, 5.0f16, 4.0f16, 3.0f16, 2.0f16, 1.0f16)); +TEST_CONSTEXPR(match_m512h(_mm512_min_ph((__m512h){+0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16}, (__m512h){-0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16}), -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16)); __m512h test_mm512_mask_min_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { // CHECK-LABEL: test_mm512_mask_min_ph @@ -669,6 +671,7 @@ __m512h test_mm512_mask_min_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h // CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}} return (__m512h)_mm512_mask_min_ph(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_m512h(_mm512_mask_min_ph((__m512h){9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16}, (__mmask32)0xFFFF0000, (__m512h){1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 9.0f16, 10.0f16, 11.0f16, 12.0f16, 13.0f16, 14.0f16, 15.0f16, 16.0f16, 17.0f16, 18.0f16, 19.0f16, 20.0f16, 21.0f16, 22.0f16, 23.0f16, 24.0f16, 25.0f16, 26.0f16, 27.0f16, 28.0f16, 29.0f16, 30.0f16, 31.0f16, 32.0f16}, (__m512h){32.0f16, 31.0f16, 30.0f16, 29.0f16, 28.0f16, 27.0f16, 26.0f16, 25.0f16, 24.0f16, 23.0f16, 22.0f16, 21.0f16, 20.0f16, 19.0f16, 18.0f16, 17.0f16, 16.0f16, 15.0f16, 14.0f16, 13.0f16, 12.0f16, 11.0f16, 10.0f16, 9.0f16, 8.0f16, 7.0f16, 6.0f16, 5.0f16, 4.0f16, 3.0f16, 2.0f16, 1.0f16}), 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 16.0f16, 15.0f16, 14.0f16, 13.0f16, 12.0f16, 11.0f16, 10.0f16, 9.0f16, 8.0f16, 7.0f16, 6.0f16, 5.0f16, 4.0f16, 3.0f16, 2.0f16, 1.0f16)); __m512h test_mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) { // CHECK-LABEL: test_mm512_maskz_min_ph @@ -676,6 +679,7 @@ __m512h test_mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}} return _mm512_maskz_min_ph(__U, __A, __B); } +TEST_CONSTEXPR(match_m512h(_mm512_maskz_min_ph((__mmask32)0xFFFF0000, (__m512h){1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 9.0f16, 10.0f16, 11.0f16, 12.0f16, 13.0f16, 14.0f16, 15.0f16, 16.0f16, 17.0f16, 18.0f16, 19.0f16, 20.0f16, 21.0f16, 22.0f16, 23.0f16, 24.0f16, 25.0f16, 26.0f16, 27.0f16, 28.0f16, 29.0f16, 30.0f16, 31.0f16, 32.0f16}, (__m512h){32.0f16, 31.0f16, 30.0f16, 29.0f16, 28.0f16, 27.0f16, 26.0f16, 25.0f16, 24.0f16, 23.0f16, 22.0f16, 21.0f16, 20.0f16, 19.0f16, 18.0f16, 17.0f16, 16.0f16, 15.0f16, 14.0f16, 13.0f16, 12.0f16, 11.0f16, 10.0f16, 9.0f16, 8.0f16, 7.0f16, 6.0f16, 5.0f16, 4.0f16, 3.0f16, 2.0f16, 1.0f16}), 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 16.0f16, 15.0f16, 14.0f16, 13.0f16, 12.0f16, 11.0f16, 10.0f16, 9.0f16, 8.0f16, 7.0f16, 6.0f16, 5.0f16, 4.0f16, 3.0f16, 2.0f16, 1.0f16)); __m512h test_mm512_min_round_ph(__m512h __A, __m512h __B) { // CHECK-LABEL: test_mm512_min_round_ph @@ -701,6 +705,8 @@ __m512h test_mm512_max_ph(__m512h __A, __m512h __B) { return _mm512_max_ph(__A, __B); } +TEST_CONSTEXPR(match_m512h(_mm512_max_ph((__m512h){1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 9.0f16, 10.0f16, 11.0f16, 12.0f16, 13.0f16, 14.0f16, 15.0f16, 16.0f16, 17.0f16, 18.0f16, 19.0f16, 20.0f16, 21.0f16, 22.0f16, 23.0f16, 24.0f16, 25.0f16, 26.0f16, 27.0f16, 28.0f16, 29.0f16, 30.0f16, 31.0f16, 32.0f16}, (__m512h){32.0f16, 31.0f16, 30.0f16, 29.0f16, 28.0f16, 27.0f16, 26.0f16, 25.0f16, 24.0f16, 23.0f16, 22.0f16, 21.0f16, 20.0f16, 19.0f16, 18.0f16, 17.0f16, 16.0f16, 15.0f16, 14.0f16, 13.0f16, 12.0f16, 11.0f16, 10.0f16, 9.0f16, 8.0f16, 7.0f16, 6.0f16, 5.0f16, 4.0f16, 3.0f16, 2.0f16, 1.0f16}), 32.0f16, 31.0f16, 30.0f16, 29.0f16, 28.0f16, 27.0f16, 26.0f16, 25.0f16, 24.0f16, 23.0f16, 22.0f16, 21.0f16, 20.0f16, 19.0f16, 18.0f16, 17.0f16, 17.0f16, 18.0f16, 19.0f16, 20.0f16, 21.0f16, 22.0f16, 23.0f16, 24.0f16, 25.0f16, 26.0f16, 27.0f16, 28.0f16, 29.0f16, 30.0f16, 31.0f16, 32.0f16)); +TEST_CONSTEXPR(match_m512h(_mm512_max_ph((__m512h){+0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16}, (__m512h){-0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16}), -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16)); __m512h test_mm512_mask_max_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) { // CHECK-LABEL: test_mm512_mask_max_ph @@ -708,6 +714,7 @@ __m512h test_mm512_mask_max_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h // CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}} return (__m512h)_mm512_mask_max_ph(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_m512h(_mm512_mask_max_ph((__m512h){9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16}, (__mmask32)0xFFFF0000, (__m512h){1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 9.0f16, 10.0f16, 11.0f16, 12.0f16, 13.0f16, 14.0f16, 15.0f16, 16.0f16, 17.0f16, 18.0f16, 19.0f16, 20.0f16, 21.0f16, 22.0f16, 23.0f16, 24.0f16, 25.0f16, 26.0f16, 27.0f16, 28.0f16, 29.0f16, 30.0f16, 31.0f16, 32.0f16}, (__m512h){32.0f16, 31.0f16, 30.0f16, 29.0f16, 28.0f16, 27.0f16, 26.0f16, 25.0f16, 24.0f16, 23.0f16, 22.0f16, 21.0f16, 20.0f16, 19.0f16, 18.0f16, 17.0f16, 16.0f16, 15.0f16, 14.0f16, 13.0f16, 12.0f16, 11.0f16, 10.0f16, 9.0f16, 8.0f16, 7.0f16, 6.0f16, 5.0f16, 4.0f16, 3.0f16, 2.0f16, 1.0f16}), 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 17.0f16, 18.0f16, 19.0f16, 20.0f16, 21.0f16, 22.0f16, 23.0f16, 24.0f16, 25.0f16, 26.0f16, 27.0f16, 28.0f16, 29.0f16, 30.0f16, 31.0f16, 32.0f16)); __m512h test_mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B) { // CHECK-LABEL: test_mm512_maskz_max_ph @@ -715,6 +722,7 @@ __m512h test_mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x half> %{{.*}}, <32 x half> %{{.*}} return _mm512_maskz_max_ph(__U, __A, __B); } +TEST_CONSTEXPR(match_m512h(_mm512_maskz_max_ph((__mmask32)0xFFFF0000, (__m512h){1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 9.0f16, 10.0f16, 11.0f16, 12.0f16, 13.0f16, 14.0f16, 15.0f16, 16.0f16, 17.0f16, 18.0f16, 19.0f16, 20.0f16, 21.0f16, 22.0f16, 23.0f16, 24.0f16, 25.0f16, 26.0f16, 27.0f16, 28.0f16, 29.0f16, 30.0f16, 31.0f16, 32.0f16}, (__m512h){32.0f16, 31.0f16, 30.0f16, 29.0f16, 28.0f16, 27.0f16, 26.0f16, 25.0f16, 24.0f16, 23.0f16, 22.0f16, 21.0f16, 20.0f16, 19.0f16, 18.0f16, 17.0f16, 16.0f16, 15.0f16, 14.0f16, 13.0f16, 12.0f16, 11.0f16, 10.0f16, 9.0f16, 8.0f16, 7.0f16, 6.0f16, 5.0f16, 4.0f16, 3.0f16, 2.0f16, 1.0f16}), 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 17.0f16, 18.0f16, 19.0f16, 20.0f16, 21.0f16, 22.0f16, 23.0f16, 24.0f16, 25.0f16, 26.0f16, 27.0f16, 28.0f16, 29.0f16, 30.0f16, 31.0f16, 32.0f16)); __m512h test_mm512_max_round_ph(__m512h __A, __m512h __B) { // CHECK-LABEL: test_mm512_max_round_ph diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 013c19ba7a929..cccd67927a286 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -4633,98 +4633,114 @@ __m128d test_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) // CHECK-LABEL: test_mm_mask_max_pd // CHECK: @llvm.x86.sse2.max.pd // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} - return _mm_mask_max_pd(__W,__U,__A,__B); + return _mm_mask_max_pd(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_m128d(_mm_mask_max_pd((__m128d){9.0, 9.0}, (__mmask8)0b01, (__m128d){1.0, 4.0}, (__m128d){2.0, 3.0}), 2.0, 9.0)); __m128d test_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: test_mm_maskz_max_pd // CHECK: @llvm.x86.sse2.max.pd // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} - return _mm_maskz_max_pd(__U,__A,__B); + return _mm_maskz_max_pd(__U,__A,__B); } +TEST_CONSTEXPR(match_m128d(_mm_maskz_max_pd((__mmask8)0b01, (__m128d){1.0, 4.0}, (__m128d){2.0, 3.0}), 2.0, 0.0)); __m256d test_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: test_mm256_mask_max_pd // CHECK: @llvm.x86.avx.max.pd.256 // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_max_pd(__W,__U,__A,__B); + return _mm256_mask_max_pd(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_m256d(_mm256_mask_max_pd((__m256d){9.0, 9.0, 9.0, 9.0}, (__mmask8)0b0101, (__m256d){1.0, 4.0, 2.0, 5.0}, (__m256d){2.0, 3.0, 3.0, 4.0}), 2.0, 9.0, 3.0, 9.0)); __m256d test_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: test_mm256_maskz_max_pd // CHECK: @llvm.x86.avx.max.pd.256 // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_maskz_max_pd(__U,__A,__B); + return _mm256_maskz_max_pd(__U,__A,__B); } +TEST_CONSTEXPR(match_m256d(_mm256_maskz_max_pd((__mmask8)0b0101, (__m256d){1.0, 4.0, 2.0, 5.0}, (__m256d){2.0, 3.0, 3.0, 4.0}), 2.0, 0.0, 3.0, 0.0)); __m128 test_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: test_mm_mask_max_ps // CHECK: @llvm.x86.sse.max.ps // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm_mask_max_ps(__W,__U,__A,__B); + return _mm_mask_max_ps(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_m128(_mm_mask_max_ps((__m128){9.0f, 9.0f, 9.0f, 9.0f}, (__mmask8)0b0101, (__m128){1.0f, 4.0f, 2.0f, 5.0f}, (__m128){2.0f, 3.0f, 3.0f, 4.0f}), 2.0f, 9.0f, 3.0f, 9.0f)); __m128 test_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: test_mm_maskz_max_ps // CHECK: @llvm.x86.sse.max.ps // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm_maskz_max_ps(__U,__A,__B); + return _mm_maskz_max_ps(__U,__A,__B); } +TEST_CONSTEXPR(match_m128(_mm_maskz_max_ps((__mmask8)0b0101, (__m128){1.0f, 4.0f, 2.0f, 5.0f}, (__m128){2.0f, 3.0f, 3.0f, 4.0f}), 2.0f, 0.0f, 3.0f, 0.0f)); __m256 test_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: test_mm256_mask_max_ps // CHECK: @llvm.x86.avx.max.ps.256 // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_max_ps(__W,__U,__A,__B); + return _mm256_mask_max_ps(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_m256(_mm256_mask_max_ps((__m256){9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f}, (__mmask8)0b01010101, (__m256){1.0f, 4.0f, 2.0f, 5.0f, 1.0f, 4.0f, 2.0f, 5.0f}, (__m256){2.0f, 3.0f, 3.0f, 4.0f, 2.0f, 3.0f, 3.0f, 4.0f}), 2.0f, 9.0f, 3.0f, 9.0f, 2.0f, 9.0f, 3.0f, 9.0f)); __m256 test_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: test_mm256_maskz_max_ps // CHECK: @llvm.x86.avx.max.ps.256 // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_maskz_max_ps(__U,__A,__B); + return _mm256_maskz_max_ps(__U,__A,__B); } +TEST_CONSTEXPR(match_m256(_mm256_maskz_max_ps((__mmask8)0b01010101, (__m256){1.0f, 4.0f, 2.0f, 5.0f, 1.0f, 4.0f, 2.0f, 5.0f}, (__m256){2.0f, 3.0f, 3.0f, 4.0f, 2.0f, 3.0f, 3.0f, 4.0f}), 2.0f, 0.0f, 3.0f, 0.0f, 2.0f, 0.0f, 3.0f, 0.0f)); __m128d test_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: test_mm_mask_min_pd // CHECK: @llvm.x86.sse2.min.pd // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} - return _mm_mask_min_pd(__W,__U,__A,__B); + return _mm_mask_min_pd(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_m128d(_mm_mask_min_pd((__m128d){9.0, 9.0}, (__mmask8)0b01, (__m128d){1.0, 4.0}, (__m128d){2.0, 3.0}), 1.0, 9.0)); __m128d test_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: test_mm_maskz_min_pd // CHECK: @llvm.x86.sse2.min.pd // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} - return _mm_maskz_min_pd(__U,__A,__B); + return _mm_maskz_min_pd(__U,__A,__B); } +TEST_CONSTEXPR(match_m128d(_mm_maskz_min_pd((__mmask8)0b01, (__m128d){1.0, 4.0}, (__m128d){2.0, 3.0}), 1.0, 0.0)); __m256d test_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: test_mm256_mask_min_pd // CHECK: @llvm.x86.avx.min.pd.256 // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_min_pd(__W,__U,__A,__B); + return _mm256_mask_min_pd(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_m256d(_mm256_mask_min_pd((__m256d){9.0, 9.0, 9.0, 9.0}, (__mmask8)0b0101, (__m256d){1.0, 4.0, 2.0, 5.0}, (__m256d){2.0, 3.0, 3.0, 4.0}), 1.0, 9.0, 2.0, 9.0)); __m256d test_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: test_mm256_maskz_min_pd // CHECK: @llvm.x86.avx.min.pd.256 // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_maskz_min_pd(__U,__A,__B); + return _mm256_maskz_min_pd(__U,__A,__B); } +TEST_CONSTEXPR(match_m256d(_mm256_maskz_min_pd((__mmask8)0b0101, (__m256d){1.0, 4.0, 2.0, 5.0}, (__m256d){2.0, 3.0, 3.0, 4.0}), 1.0, 0.0, 2.0, 0.0)); __m128 test_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: test_mm_mask_min_ps // CHECK: @llvm.x86.sse.min.ps // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm_mask_min_ps(__W,__U,__A,__B); + return _mm_mask_min_ps(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_m128(_mm_mask_min_ps((__m128){9.0f, 9.0f, 9.0f, 9.0f}, (__mmask8)0b0101, (__m128){1.0f, 4.0f, 2.0f, 5.0f}, (__m128){2.0f, 3.0f, 3.0f, 4.0f}), 1.0f, 9.0f, 2.0f, 9.0f)); __m128 test_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: test_mm_maskz_min_ps // CHECK: @llvm.x86.sse.min.ps // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm_maskz_min_ps(__U,__A,__B); + return _mm_maskz_min_ps(__U,__A,__B); } +TEST_CONSTEXPR(match_m128(_mm_maskz_min_ps((__mmask8)0b0101, (__m128){1.0f, 4.0f, 2.0f, 5.0f}, (__m128){2.0f, 3.0f, 3.0f, 4.0f}), 1.0f, 0.0f, 2.0f, 0.0f)); __m256 test_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: test_mm256_mask_min_ps // CHECK: @llvm.x86.avx.min.ps.256 // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_min_ps(__W,__U,__A,__B); + return _mm256_mask_min_ps(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_m256(_mm256_mask_min_ps((__m256){9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f}, (__mmask8)0b01010101, (__m256){1.0f, 4.0f, 2.0f, 5.0f, 1.0f, 4.0f, 2.0f, 5.0f}, (__m256){2.0f, 3.0f, 3.0f, 4.0f, 2.0f, 3.0f, 3.0f, 4.0f}), 1.0f, 9.0f, 2.0f, 9.0f, 1.0f, 9.0f, 2.0f, 9.0f)); __m256 test_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: test_mm256_maskz_min_ps // CHECK: @llvm.x86.avx.min.ps.256 // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_maskz_min_ps(__U,__A,__B); + return _mm256_maskz_min_ps(__U,__A,__B); } +TEST_CONSTEXPR(match_m256(_mm256_maskz_min_ps((__mmask8)0b01010101, (__m256){1.0f, 4.0f, 2.0f, 5.0f, 1.0f, 4.0f, 2.0f, 5.0f}, (__m256){2.0f, 3.0f, 3.0f, 4.0f, 2.0f, 3.0f, 3.0f, 4.0f}), 1.0f, 0.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, 0.0f)); __m128d test_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: test_mm_mask_mul_pd // CHECK: fmul <2 x double> %{{.*}}, %{{.*}} diff --git a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c index 68d09849a317b..4aec29838f4a6 100644 --- a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c @@ -355,72 +355,88 @@ __m256h test_mm256_min_ph(__m256h __A, __m256h __B) { // CHECK: @llvm.x86.avx512fp16.min.ph.256 return _mm256_min_ph(__A, __B); } +TEST_CONSTEXPR(match_m256h(_mm256_min_ph((__m256h){1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 9.0f16, 10.0f16, 11.0f16, 12.0f16, 13.0f16, 14.0f16, 15.0f16, 16.0f16}, (__m256h){16.0f16, 15.0f16, 14.0f16, 13.0f16, 12.0f16, 11.0f16, 10.0f16, 9.0f16, 8.0f16, 7.0f16, 6.0f16, 5.0f16, 4.0f16, 3.0f16, 2.0f16, 1.0f16}), 1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 8.0f16, 7.0f16, 6.0f16, 5.0f16, 4.0f16, 3.0f16, 2.0f16, 1.0f16)); +TEST_CONSTEXPR(match_m256h(_mm256_min_ph((__m256h){+0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16}, (__m256h){-0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16}), -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16)); __m256h test_mm256_mask_min_ph(__m256h __W, __mmask32 __U, __m256h __A, __m256h __B) { // CHECK-LABEL: test_mm256_mask_min_ph // CHECK: @llvm.x86.avx512fp16.min.ph.256 return (__m256h)_mm256_mask_min_ph(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_m256h(_mm256_mask_min_ph((__m256h){9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16}, (__mmask16)0b0101010101010101, (__m256h){1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16}, (__m256h){2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16}), 1.0f16, 9.0f16, 2.0f16, 9.0f16, 1.0f16, 9.0f16, 2.0f16, 9.0f16, 1.0f16, 9.0f16, 2.0f16, 9.0f16, 1.0f16, 9.0f16, 2.0f16, 9.0f16)); __m256h test_mm256_maskz_min_ph(__mmask32 __U, __m256h __A, __m256h __B) { // CHECK-LABEL: test_mm256_maskz_min_ph // CHECK: @llvm.x86.avx512fp16.min.ph.256 return _mm256_maskz_min_ph(__U, __A, __B); } +TEST_CONSTEXPR(match_m256h(_mm256_maskz_min_ph((__mmask16)0b0101010101010101, (__m256h){1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16}, (__m256h){2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16}), 1.0f16, 0.0f16, 2.0f16, 0.0f16, 1.0f16, 0.0f16, 2.0f16, 0.0f16, 1.0f16, 0.0f16, 2.0f16, 0.0f16, 1.0f16, 0.0f16, 2.0f16, 0.0f16)); __m128h test_mm_min_ph(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_min_ph // CHECK: @llvm.x86.avx512fp16.min.ph.128 return _mm_min_ph(__A, __B); } +TEST_CONSTEXPR(match_m128h(_mm_min_ph((__m128h){1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16}, (__m128h){8.0f16, 7.0f16, 6.0f16, 5.0f16, 4.0f16, 3.0f16, 2.0f16, 1.0f16}), 1.0f16, 2.0f16, 3.0f16, 4.0f16, 4.0f16, 3.0f16, 2.0f16, 1.0f16)); +TEST_CONSTEXPR(match_m128h(_mm_min_ph((__m128h){+0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16}, (__m128h){-0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16}), -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16)); __m128h test_mm_mask_min_ph(__m128h __W, __mmask32 __U, __m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_mask_min_ph // CHECK: @llvm.x86.avx512fp16.min.ph.128 return (__m128h)_mm_mask_min_ph(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_m128h(_mm_mask_min_ph((__m128h){9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16}, (__mmask8)0b01010101, (__m128h){1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16}, (__m128h){2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16}), 1.0f16, 9.0f16, 2.0f16, 9.0f16, 1.0f16, 9.0f16, 2.0f16, 9.0f16)); __m128h test_mm_maskz_min_ph(__mmask32 __U, __m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_maskz_min_ph // CHECK: @llvm.x86.avx512fp16.min.ph.128 return _mm_maskz_min_ph(__U, __A, __B); } +TEST_CONSTEXPR(match_m128h(_mm_maskz_min_ph((__mmask8)0b01010101, (__m128h){1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16}, (__m128h){2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16}), 1.0f16, 0.0f16, 2.0f16, 0.0f16, 1.0f16, 0.0f16, 2.0f16, 0.0f16)); __m256h test_mm256_max_ph(__m256h __A, __m256h __B) { // CHECK-LABEL: test_mm256_max_ph // CHECK: @llvm.x86.avx512fp16.max.ph.256 return _mm256_max_ph(__A, __B); } +TEST_CONSTEXPR(match_m256h(_mm256_max_ph((__m256h){1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 9.0f16, 10.0f16, 11.0f16, 12.0f16, 13.0f16, 14.0f16, 15.0f16, 16.0f16}, (__m256h){16.0f16, 15.0f16, 14.0f16, 13.0f16, 12.0f16, 11.0f16, 10.0f16, 9.0f16, 8.0f16, 7.0f16, 6.0f16, 5.0f16, 4.0f16, 3.0f16, 2.0f16, 1.0f16}), 16.0f16, 15.0f16, 14.0f16, 13.0f16, 12.0f16, 11.0f16, 10.0f16, 9.0f16, 9.0f16, 10.0f16, 11.0f16, 12.0f16, 13.0f16, 14.0f16, 15.0f16, 16.0f16)); +TEST_CONSTEXPR(match_m256h(_mm256_max_ph((__m256h){+0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16}, (__m256h){-0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16}), -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16)); __m256h test_mm256_mask_max_ph(__m256h __W, __mmask32 __U, __m256h __A, __m256h __B) { // CHECK-LABEL: test_mm256_mask_max_ph // CHECK: @llvm.x86.avx512fp16.max.ph.256 return (__m256h)_mm256_mask_max_ph(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_m256h(_mm256_mask_max_ph((__m256h){9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16}, (__mmask16)0b0101010101010101, (__m256h){1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16}, (__m256h){2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16}), 2.0f16, 9.0f16, 3.0f16, 9.0f16, 2.0f16, 9.0f16, 3.0f16, 9.0f16, 2.0f16, 9.0f16, 3.0f16, 9.0f16, 2.0f16, 9.0f16, 3.0f16, 9.0f16)); __m256h test_mm256_maskz_max_ph(__mmask32 __U, __m256h __A, __m256h __B) { // CHECK-LABEL: test_mm256_maskz_max_ph // CHECK: @llvm.x86.avx512fp16.max.ph.256 return _mm256_maskz_max_ph(__U, __A, __B); } +TEST_CONSTEXPR(match_m256h(_mm256_maskz_max_ph((__mmask16)0b0101010101010101, (__m256h){1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16}, (__m256h){2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16}), 2.0f16, 0.0f16, 3.0f16, 0.0f16, 2.0f16, 0.0f16, 3.0f16, 0.0f16, 2.0f16, 0.0f16, 3.0f16, 0.0f16, 2.0f16, 0.0f16, 3.0f16, 0.0f16)); __m128h test_mm_max_ph(__m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_max_ph // CHECK: @llvm.x86.avx512fp16.max.ph.128 return _mm_max_ph(__A, __B); } +TEST_CONSTEXPR(match_m128h(_mm_max_ph((__m128h){1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16}, (__m128h){8.0f16, 7.0f16, 6.0f16, 5.0f16, 4.0f16, 3.0f16, 2.0f16, 1.0f16}), 8.0f16, 7.0f16, 6.0f16, 5.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16)); +TEST_CONSTEXPR(match_m128h(_mm_max_ph((__m128h){+0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16}, (__m128h){-0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16}), -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16, -0.0f16, +0.0f16)); __m128h test_mm_mask_max_ph(__m128h __W, __mmask32 __U, __m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_mask_max_ph // CHECK: @llvm.x86.avx512fp16.max.ph.128 return (__m128h)_mm_mask_max_ph(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_m128h(_mm_mask_max_ph((__m128h){9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16, 9.0f16}, (__mmask8)0b01010101, (__m128h){1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16}, (__m128h){2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16}), 2.0f16, 9.0f16, 3.0f16, 9.0f16, 2.0f16, 9.0f16, 3.0f16, 9.0f16)); __m128h test_mm_maskz_max_ph(__mmask32 __U, __m128h __A, __m128h __B) { // CHECK-LABEL: test_mm_maskz_max_ph // CHECK: @llvm.x86.avx512fp16.max.ph.128 return _mm_maskz_max_ph(__U, __A, __B); } +TEST_CONSTEXPR(match_m128h(_mm_maskz_max_ph((__mmask8)0b01010101, (__m128h){1.0f16, 4.0f16, 2.0f16, 5.0f16, 1.0f16, 4.0f16, 2.0f16, 5.0f16}, (__m128h){2.0f16, 3.0f16, 3.0f16, 4.0f16, 2.0f16, 3.0f16, 3.0f16, 4.0f16}), 2.0f16, 0.0f16, 3.0f16, 0.0f16, 2.0f16, 0.0f16, 3.0f16, 0.0f16)); __m128h test_mm_abs_ph(__m128h a) { // CHECK-LABEL: test_mm_abs_ph diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index fd4775739fad8..edd9f00bae2b2 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -515,6 +515,9 @@ __m128 test_mm_max_ps(__m128 A, __m128 B) { // CHECK: @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_max_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_max_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+4.0f, +3.0f, +2.0f, +1.0f}), +4.0f, +3.0f, +3.0f, +4.0f)); +TEST_CONSTEXPR(match_m128(_mm_max_ps((__m128){+0.0f, -0.0f, +0.0f, -0.0f}, (__m128){-0.0f, +0.0f, -0.0f, +0.0f}), -0.0f, +0.0f, -0.0f, +0.0f)); +TEST_CONSTEXPR(match_m128(_mm_max_ps((__m128){-1.0f, -2.0f, +3.0f, -4.0f}, (__m128){+1.0f, -3.0f, +2.0f, -5.0f}), +1.0f, -2.0f, +3.0f, -4.0f)); __m128 test_mm_max_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_max_ss @@ -527,6 +530,9 @@ __m128 test_mm_min_ps(__m128 A, __m128 B) { // CHECK: @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_min_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_min_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+4.0f, +3.0f, +2.0f, +1.0f}), +1.0f, +2.0f, +2.0f, +1.0f)); +TEST_CONSTEXPR(match_m128(_mm_min_ps((__m128){+0.0f, -0.0f, +0.0f, -0.0f}, (__m128){-0.0f, +0.0f, -0.0f, +0.0f}), -0.0f, +0.0f, -0.0f, +0.0f)); +TEST_CONSTEXPR(match_m128(_mm_min_ps((__m128){-1.0f, -2.0f, +3.0f, -4.0f}, (__m128){+1.0f, -3.0f, +2.0f, -5.0f}), -1.0f, -3.0f, +2.0f, -5.0f)); __m128 test_mm_min_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_min_ss diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index c4975b456ba22..ab0a857b926f3 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -885,6 +885,9 @@ __m128d test_mm_max_pd(__m128d A, __m128d B) { // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_max_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_max_pd((__m128d){+1.0, +2.0}, (__m128d){+4.0, +1.0}), +4.0, +2.0)); +TEST_CONSTEXPR(match_m128d(_mm_max_pd((__m128d){+0.0, -0.0}, (__m128d){-0.0, +0.0}), -0.0, +0.0)); +TEST_CONSTEXPR(match_m128d(_mm_max_pd((__m128d){-1.0, +3.0}, (__m128d){+1.0, +2.0}), +1.0, +3.0)); __m128d test_mm_max_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_max_sd @@ -919,6 +922,9 @@ __m128d test_mm_min_pd(__m128d A, __m128d B) { // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_min_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_min_pd((__m128d){+1.0, +2.0}, (__m128d){+4.0, +1.0}), +1.0, +1.0)); +TEST_CONSTEXPR(match_m128d(_mm_min_pd((__m128d){+0.0, -0.0}, (__m128d){-0.0, +0.0}), -0.0, +0.0)); +TEST_CONSTEXPR(match_m128d(_mm_min_pd((__m128d){-1.0, +3.0}, (__m128d){+1.0, +2.0}), -1.0, +2.0)); __m128d test_mm_min_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_min_sd _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
