https://github.com/kimsh02 updated https://github.com/llvm/llvm-project/pull/158703
>From 1b81bf79bc7e6a2c1df846b27e4a95cf1cb97090 Mon Sep 17 00:00:00 2001 From: kimsh02 <kimshaw...@icloud.com> Date: Mon, 15 Sep 2025 10:58:34 -0700 Subject: [PATCH 01/14] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add AVX512 VPTERNLOGD/VPTERNLOGQ intrinsics to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 16 +- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 64 ++++- clang/lib/AST/ExprConstant.cpp | 91 +++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 138 ++++++++++ clang/test/CodeGen/X86/avx512vl-builtins.c | 277 +++++++++++++++++++++ 5 files changed, 580 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 044c755d4d7cf..b411e583b1a2c 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -2402,28 +2402,36 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512> def psraq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; def psrld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">; def psrlq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; +} + +let Features = "avx512f", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def pternlogd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; def pternlogd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; def pternlogq512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">; def pternlogq512_maskz : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def pternlogd128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">; def pternlogd128_maskz : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def pternlogd256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">; def pternlogd256_maskz : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def pternlogq128_mask : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">; def pternlogq128_maskz : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def pternlogq256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">; def pternlogq256_maskz : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 77729a5d67c87..00466aa7ae663 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2952,10 +2952,57 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, }); Dst.initializeAllElements(); - return true; } +static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, + const CallExpr *Call, bool MaskZ) { + assert(Call->getNumArgs() == 5); + + const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>(); + unsigned DstLen = VecT->getNumElements(); + PrimType DstElemT = *S.getContext().classify(VecT->getElementType()); + + APSInt U = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(4))); + APSInt Imm = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(3))); + const Pointer &C = S.Stk.pop<Pointer>(); + const Pointer &B = S.Stk.pop<Pointer>(); + const Pointer &A = S.Stk.pop<Pointer>(); + + const Pointer &Dst = S.Stk.peek<Pointer>(); + + for (unsigned I = 0; I < DstLen; ++I) { + APSInt a, b, c; + INT_TYPE_SWITCH(DstElemT, { + a = A.elem<T>(I).toAPSInt(); + b = B.elem<T>(I).toAPSInt(); + c = C.elem<T>(I).toAPSInt(); + }); + + unsigned BitWidth = a.getBitWidth(); + APInt R(BitWidth, 0); + bool DstUnsigned = a.isUnsigned(); + + if (U[I]) { + for (unsigned Bit = 0; Bit < BitWidth; ++Bit) { + unsigned Idx = (a[Bit] << 2) | (b[Bit] << 1) | (c[Bit]); + R.setBitVal(Bit, Imm[Idx]); + } + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { + Dst.elem<T>(I) = static_cast<T>(APSInt(R, DstUnsigned)); + }); + } else if (MaskZ) { + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { + Dst.elem<T>(I) = static_cast<T>(APSInt(R, DstUnsigned)); + }); + } else { + INT_TYPE_SWITCH_NO_BOOL(DstElemT, + { Dst.elem<T>(I) = static_cast<T>(a); }); + } + } + Dst.initializeAllElements(); +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -3606,7 +3653,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_selectpd_256: case X86::BI__builtin_ia32_selectpd_512: return interp__builtin_select(S, OpPC, Call); - + case X86::BI__builtin_ia32_pternlogd128_mask: + case X86::BI__builtin_ia32_pternlogd256_mask: + case X86::BI__builtin_ia32_pternlogd512_mask: + case X86::BI__builtin_ia32_pternlogq128_mask: + case X86::BI__builtin_ia32_pternlogq256_mask: + case X86::BI__builtin_ia32_pternlogq512_mask: + return interp__builtin_pternlog(S, OpPC, Call, false); + case X86::BI__builtin_ia32_pternlogd128_maskz: + case X86::BI__builtin_ia32_pternlogd256_maskz: + case X86::BI__builtin_ia32_pternlogd512_maskz: + case X86::BI__builtin_ia32_pternlogq128_maskz: + case X86::BI__builtin_ia32_pternlogq256_maskz: + case X86::BI__builtin_ia32_pternlogq512_maskz: + return interp__builtin_pternlog(S, OpPC, Call, true); case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3b9ca82910033..db725ba935b99 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12025,6 +12025,97 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_pternlogd128_mask: + case X86::BI__builtin_ia32_pternlogd256_mask: + case X86::BI__builtin_ia32_pternlogd512_mask: + case X86::BI__builtin_ia32_pternlogq128_mask: + case X86::BI__builtin_ia32_pternlogq256_mask: + case X86::BI__builtin_ia32_pternlogq512_mask: { + APValue AValue, BValue, CValue, ImmValue, UValue; + if (!EvaluateAsRValue(Info, E->getArg(0), AValue) || + !EvaluateAsRValue(Info, E->getArg(1), BValue) || + !EvaluateAsRValue(Info, E->getArg(2), CValue) || + !EvaluateAsRValue(Info, E->getArg(3), ImmValue) || + !EvaluateAsRValue(Info, E->getArg(4), UValue)) + return false; + + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); + APInt Imm = ImmValue.getInt(); + APInt U = UValue.getInt(); + unsigned ResultLen = AValue.getVectorLength(); + SmallVector<APValue, 16> ResultElements; + ResultElements.reserve(ResultLen); + + for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) { + APInt ALane = AValue.getVectorElt(EltNum).getInt(); + APInt BLane = BValue.getVectorElt(EltNum).getInt(); + APInt CLane = CValue.getVectorElt(EltNum).getInt(); + + if (U[EltNum]) { + unsigned BitWidth = ALane.getBitWidth(); + APInt ResLane(BitWidth, 0); + + for (unsigned Bit = 0; Bit < BitWidth; ++Bit) { + unsigned ABit = ALane[Bit]; + unsigned BBit = BLane[Bit]; + unsigned CBit = CLane[Bit]; + + unsigned Idx = (ABit << 2) | (BBit << 1) | CBit; + ResLane.setBitVal(Bit, Imm[Idx]); + } + ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned))); + } else { + ResultElements.push_back(APValue(APSInt(ALane, DestUnsigned))); + } + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case X86::BI__builtin_ia32_pternlogd128_maskz: + case X86::BI__builtin_ia32_pternlogd256_maskz: + case X86::BI__builtin_ia32_pternlogd512_maskz: + case X86::BI__builtin_ia32_pternlogq128_maskz: + case X86::BI__builtin_ia32_pternlogq256_maskz: + case X86::BI__builtin_ia32_pternlogq512_maskz: { + APValue AValue, BValue, CValue, ImmValue, UValue; + if (!EvaluateAsRValue(Info, E->getArg(0), AValue) || + !EvaluateAsRValue(Info, E->getArg(1), BValue) || + !EvaluateAsRValue(Info, E->getArg(2), CValue) || + !EvaluateAsRValue(Info, E->getArg(3), ImmValue) || + !EvaluateAsRValue(Info, E->getArg(4), UValue)) + return false; + + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); + APInt Imm = ImmValue.getInt(); + APInt U = UValue.getInt(); + unsigned ResultLen = AValue.getVectorLength(); + SmallVector<APValue, 16> ResultElements; + ResultElements.reserve(ResultLen); + + for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) { + APInt ALane = AValue.getVectorElt(EltNum).getInt(); + APInt BLane = BValue.getVectorElt(EltNum).getInt(); + APInt CLane = CValue.getVectorElt(EltNum).getInt(); + + unsigned BitWidth = ALane.getBitWidth(); + APInt ResLane(BitWidth, 0); + + if (U[EltNum]) { + for (unsigned Bit = 0; Bit < BitWidth; ++Bit) { + unsigned ABit = ALane[Bit]; + unsigned BBit = BLane[Bit]; + unsigned CBit = CLane[Bit]; + + unsigned Idx = (ABit << 2) | (BBit << 1) | CBit; + ResLane.setBitVal(Bit, Imm[Idx]); + } + } + ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned))); + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case Builtin::BI__builtin_elementwise_clzg: case Builtin::BI__builtin_elementwise_ctzg: { APValue SourceLHS; diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index e25eb9846431d..9fa450c68e503 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -6246,6 +6246,27 @@ __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) { // CHECK: @llvm.x86.avx512.pternlog.d.512({{.*}}, i32 240) return _mm512_ternarylogic_epi32(__A, __B, __C, _MM_TERNLOG_A); } +TEST_CONSTEXPR(match_v16si( + _mm512_ternarylogic_epi32( + ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC)); +TEST_CONSTEXPR(match_v16si( + _mm512_ternarylogic_epi32( + ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF)); +TEST_CONSTEXPR(match_v16si( + _mm512_ternarylogic_epi32( + ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_mask_ternarylogic_epi32 @@ -6253,6 +6274,30 @@ __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i _ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_ternarylogic_epi32(__A, __U, __B, __C, _MM_TERNLOG_B); } +TEST_CONSTEXPR(match_v16si( + _mm512_mask_ternarylogic_epi32( + ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + (__mmask16)0x3333, + ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0)); +TEST_CONSTEXPR(match_v16si( + _mm512_mask_ternarylogic_epi32( + ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + (__mmask16)0xCCCC, + ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF)); +TEST_CONSTEXPR(match_v16si( + _mm512_mask_ternarylogic_epi32( + ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + (__mmask16)0x5555, + ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9)); __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_maskz_ternarylogic_epi32 @@ -6260,12 +6305,57 @@ __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> zeroinitializer return _mm512_maskz_ternarylogic_epi32(__U, __A, __B, __C, _MM_TERNLOG_C); } +TEST_CONSTEXPR(match_v16si( + _mm512_maskz_ternarylogic_epi32( + (__mmask16)0x3333, + ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0)); +TEST_CONSTEXPR(match_v16si( + _mm512_maskz_ternarylogic_epi32( + (__mmask16)0xCCCC, + ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF)); +TEST_CONSTEXPR(match_v16si( + _mm512_maskz_ternarylogic_epi32( + (__mmask16)0x5555, + ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_ternarylogic_epi64 // CHECK: @llvm.x86.avx512.pternlog.q.512({{.*}}, i32 192) return _mm512_ternarylogic_epi64(__A, __B, __C, _MM_TERNLOG_A & _MM_TERNLOG_B); } +TEST_CONSTEXPR(match_v8di( + _mm512_ternarylogic_epi64( + ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC)); +TEST_CONSTEXPR(match_v8di( + _mm512_ternarylogic_epi64( + ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF)); +TEST_CONSTEXPR(match_v8di( + _mm512_ternarylogic_epi64( + ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_mask_ternarylogic_epi64 @@ -6273,6 +6363,30 @@ __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_ternarylogic_epi64(__A, __U, __B, __C, _MM_TERNLOG_B | _MM_TERNLOG_C); } +TEST_CONSTEXPR(match_v8di( + _mm512_mask_ternarylogic_epi64( + ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + (__mmask8)0x33, + ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0)); +TEST_CONSTEXPR(match_v8di( + _mm512_mask_ternarylogic_epi64( + ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + (__mmask8)0xCC, + ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF)); +TEST_CONSTEXPR(match_v8di( + _mm512_mask_ternarylogic_epi64( + ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + (__mmask8)0x55, + ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9)); __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_maskz_ternarylogic_epi64 @@ -6280,6 +6394,30 @@ __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i _ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> zeroinitializer return _mm512_maskz_ternarylogic_epi64(__U, __A, __B, __C, ~_MM_TERNLOG_A | (_MM_TERNLOG_B ^ _MM_TERNLOG_C)); } +TEST_CONSTEXPR(match_v8di( + _mm512_maskz_ternarylogic_epi64( + (__mmask8)0x33, + ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0)); +TEST_CONSTEXPR(match_v8di( + _mm512_maskz_ternarylogic_epi64( + (__mmask8)0xCC, + ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF)); +TEST_CONSTEXPR(match_v8di( + _mm512_maskz_ternarylogic_epi64( + (__mmask8)0x55, + ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) { // CHECK-LABEL: test_mm512_shuffle_f32x4 diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 465c43dff8493..d29045af6e303 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -8328,6 +8328,27 @@ __m128i test_mm_ternarylogic_epi32(__m128i __A, __m128i __B, __m128i __C) { // CHECK: @llvm.x86.avx512.pternlog.d.128 return _mm_ternarylogic_epi32(__A, __B, __C, 4); } +TEST_CONSTEXPR(match_v4si( + _mm_ternarylogic_epi32( + ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})), + ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})), + ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })), + (unsigned char)0xCA), + 0xB, 0xC, 0xB, 0xC)); +TEST_CONSTEXPR(match_v4si( + _mm_ternarylogic_epi32( + ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})), + ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), + ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0xF, 0xF, 0xF, 0xF)); +TEST_CONSTEXPR(match_v4si( + _mm_ternarylogic_epi32( + ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})), + ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), + ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0)); __m128i test_mm_mask_ternarylogic_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { // CHECK-LABEL: test_mm_mask_ternarylogic_epi32 @@ -8335,6 +8356,30 @@ __m128i test_mm_mask_ternarylogic_epi32(__m128i __A, __mmask8 __U, __m128i __B, // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_ternarylogic_epi32(__A, __U, __B, __C, 4); } +TEST_CONSTEXPR(match_v4si( + _mm_mask_ternarylogic_epi32( + ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})), + (__mmask8)0x03, + ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})), + ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })), + (unsigned char)0xCA), + 0xB, 0xC, -0x1, 0x0)); +TEST_CONSTEXPR(match_v4si( + _mm_mask_ternarylogic_epi32( + ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})), + (__mmask8)0x0C, + ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), + ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x9, 0x9, 0xF, 0xF)); +TEST_CONSTEXPR(match_v4si( + _mm_mask_ternarylogic_epi32( + ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})), + (__mmask8)0x05, + ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), + ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x9, 0x0, 0x9)); __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { // CHECK-LABEL: test_mm_maskz_ternarylogic_epi32 @@ -8342,12 +8387,57 @@ __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B, // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer return _mm_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4); } +TEST_CONSTEXPR(match_v4si( + _mm_maskz_ternarylogic_epi32( + (__mmask8)0x03, + ((__m128i)((__v4si){-0x1, 0x0, -0x1, 0x0})), + ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})), + ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })), + (unsigned char)0xCA), + 0xB, 0xC, 0x0, 0x0)); +TEST_CONSTEXPR(match_v4si( + _mm_maskz_ternarylogic_epi32( + (__mmask8)0x0C, + ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})), + ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), + ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x0, 0x0, 0xF, 0xF)); +TEST_CONSTEXPR(match_v4si( + _mm_maskz_ternarylogic_epi32( + (__mmask8)0x05, + ((__m128i)((__v4si){0x9, 0x9, 0x9, 0x9})), + ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), + ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0)); __m256i test_mm256_ternarylogic_epi32(__m256i __A, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_ternarylogic_epi32 // CHECK: @llvm.x86.avx512.pternlog.d.256 return _mm256_ternarylogic_epi32(__A, __B, __C, 4); } +TEST_CONSTEXPR(match_v8si( + _mm256_ternarylogic_epi32( + ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC)); +TEST_CONSTEXPR(match_v8si( + _mm256_ternarylogic_epi32( + ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF)); +TEST_CONSTEXPR(match_v8si( + _mm256_ternarylogic_epi32( + ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_mask_ternarylogic_epi32 @@ -8355,6 +8445,30 @@ __m256i test_mm256_mask_ternarylogic_epi32(__m256i __A, __mmask8 __U, __m256i __ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_ternarylogic_epi32(__A, __U, __B, __C, 4); } +TEST_CONSTEXPR(match_v8si( + _mm256_mask_ternarylogic_epi32( + ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + (__mmask8)0x33, + ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0)); +TEST_CONSTEXPR(match_v8si( + _mm256_mask_ternarylogic_epi32( + ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + (__mmask8)0xCC, + ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF)); +TEST_CONSTEXPR(match_v8si( + _mm256_mask_ternarylogic_epi32( + ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + (__mmask8)0x55, + ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9)); __m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_maskz_ternarylogic_epi32 @@ -8362,12 +8476,57 @@ __m256i test_mm256_maskz_ternarylogic_epi32(__mmask8 __U, __m256i __A, __m256i _ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> zeroinitializer return _mm256_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4); } +TEST_CONSTEXPR(match_v8si( + _mm256_maskz_ternarylogic_epi32( + (__mmask8)0x33, + ((__m256i)((__v8si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), + ((__m256i)((__v8si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), + ((__m256i)((__v8si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0)); +TEST_CONSTEXPR(match_v8si( + _mm256_maskz_ternarylogic_epi32( + (__mmask8)0xCC, + ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF)); +TEST_CONSTEXPR(match_v8si( + _mm256_maskz_ternarylogic_epi32( + (__mmask8)0x55, + ((__m256i)((__v8si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v8si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v8si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m128i test_mm_ternarylogic_epi64(__m128i __A, __m128i __B, __m128i __C) { // CHECK-LABEL: test_mm_ternarylogic_epi64 // CHECK: @llvm.x86.avx512.pternlog.q.128 return _mm_ternarylogic_epi64(__A, __B, __C, 4); } +TEST_CONSTEXPR(match_v2di( + _mm_ternarylogic_epi64( + ((__m128i)((__v2di){-0x1, 0x0})), + ((__m128i)((__v2di){0xB, 0xB})), + ((__m128i)((__v2di){0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC)); +TEST_CONSTEXPR(match_v2di( + _mm_ternarylogic_epi64( + ((__m128i)((__v2di){0x9, 0x9})), + ((__m128i)((__v2di){0x4, 0x4})), + ((__m128i)((__v2di){0x2, 0x2})), + (unsigned char)0xFE), + 0xF, 0xF)); +TEST_CONSTEXPR(match_v2di( + _mm_ternarylogic_epi64( + ((__m128i)((__v2di){0x9, 0x9})), + ((__m128i)((__v2di){0x4, 0x4})), + ((__m128i)((__v2di){0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0)); __m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { // CHECK-LABEL: test_mm_mask_ternarylogic_epi64 @@ -8375,6 +8534,30 @@ __m128i test_mm_mask_ternarylogic_epi64(__m128i __A, __mmask8 __U, __m128i __B, // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_ternarylogic_epi64(__A, __U, __B, __C, 4); } +TEST_CONSTEXPR(match_v2di( + _mm_mask_ternarylogic_epi64( + ((__m128i)((__v2di){-0x1, 0x0})), + (__mmask8)0x33, + ((__m128i)((__v2di){0xB, 0xB})), + ((__m128i)((__v2di){0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC)); +TEST_CONSTEXPR(match_v2di( + _mm_mask_ternarylogic_epi64( + ((__m128i)((__v2di){0x9, 0x9})), + (__mmask8)0xCC, + ((__m128i)((__v2di){0x4, 0x4})), + ((__m128i)((__v2di){0x2, 0x2})), + (unsigned char)0xFE), + 0x9, 0x9)); +TEST_CONSTEXPR(match_v2di( + _mm_mask_ternarylogic_epi64( + ((__m128i)((__v2di){0x9, 0x9})), + (__mmask8)0x55, + ((__m128i)((__v2di){0x4, 0x4})), + ((__m128i)((__v2di){0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x9)); __m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { // CHECK-LABEL: test_mm_maskz_ternarylogic_epi64 @@ -8382,12 +8565,57 @@ __m128i test_mm_maskz_ternarylogic_epi64(__mmask8 __U, __m128i __A, __m128i __B, // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> zeroinitializer return _mm_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4); } +TEST_CONSTEXPR(match_v2di( + _mm_maskz_ternarylogic_epi64( + (__mmask8)0x03, + ((__m128i)((__v2di){-0x1, 0x0})), + ((__m128i)((__v2di){0xB, 0xB})), + ((__m128i)((__v2di){0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC)); +TEST_CONSTEXPR(match_v2di( + _mm_maskz_ternarylogic_epi64( + (__mmask8)0x0C, + ((__m128i)((__v2di){0x9, 0x9})), + ((__m128i)((__v2di){0x4, 0x4})), + ((__m128i)((__v2di){0x2, 0x2})), + (unsigned char)0xFE), + 0x0, 0x0)); +TEST_CONSTEXPR(match_v2di( + _mm_maskz_ternarylogic_epi64( + (__mmask8)0x05, + ((__m128i)((__v2di){0x9, 0x9})), + ((__m128i)((__v2di){0x4, 0x4})), + ((__m128i)((__v2di){0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0)); __m256i test_mm256_ternarylogic_epi64(__m256i __A, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_ternarylogic_epi64 // CHECK: @llvm.x86.avx512.pternlog.q.256 return _mm256_ternarylogic_epi64(__A, __B, __C, 4); } +TEST_CONSTEXPR(match_v4di( + _mm256_ternarylogic_epi64( + ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})), + ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})), + ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0xB, 0xC)); +TEST_CONSTEXPR(match_v4di( + _mm256_ternarylogic_epi64( + ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0xF, 0xF, 0xF, 0xF)); +TEST_CONSTEXPR(match_v4di( + _mm256_ternarylogic_epi64( + ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0)); __m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_mask_ternarylogic_epi64 @@ -8395,6 +8623,30 @@ __m256i test_mm256_mask_ternarylogic_epi64(__m256i __A, __mmask8 __U, __m256i __ // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_ternarylogic_epi64(__A, __U, __B, __C, 4); } +TEST_CONSTEXPR(match_v4di( + _mm256_mask_ternarylogic_epi64( + ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})), + (__mmask8)0x33, + ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})), + ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, -0x1, 0x0)); +TEST_CONSTEXPR(match_v4di( + _mm256_mask_ternarylogic_epi64( + ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})), + (__mmask8)0xCC, + ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x9, 0x9, 0xF, 0xF)); +TEST_CONSTEXPR(match_v4di( + _mm256_mask_ternarylogic_epi64( + ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})), + (__mmask8)0x55, + ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x9, 0x0, 0x9)); __m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) { // CHECK-LABEL: test_mm256_maskz_ternarylogic_epi64 @@ -8402,6 +8654,31 @@ __m256i test_mm256_maskz_ternarylogic_epi64(__mmask8 __U, __m256i __A, __m256i _ // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> zeroinitializer return _mm256_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4); } +TEST_CONSTEXPR(match_v4di( + _mm256_maskz_ternarylogic_epi64( + (__mmask8)0x33, + ((__m256i)((__v4di){-0x1, 0x0, -0x1, 0x0})), + ((__m256i)((__v4di){0xB, 0xB, 0xB, 0xB})), + ((__m256i)((__v4di){0xC, 0xC, 0xC, 0xC})), + (unsigned char)0xCA), + 0xB, 0xC, 0x0, 0x0)); +TEST_CONSTEXPR(match_v4di( + _mm256_maskz_ternarylogic_epi64( + (__mmask8)0xCC, + ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0xFE), + 0x0, 0x0, 0xF, 0xF)); +TEST_CONSTEXPR(match_v4di( + _mm256_maskz_ternarylogic_epi64( + (__mmask8)0x55, + ((__m256i)((__v4di){0x9, 0x9, 0x9, 0x9})), + ((__m256i)((__v4di){0x4, 0x4, 0x4, 0x4})), + ((__m256i)((__v4di){0x2, 0x2, 0x2, 0x2})), + (unsigned char)0x80), + 0x0, 0x0, 0x0, 0x0)); + __m256 test_mm256_shuffle_f32x4(__m256 __A, __m256 __B) { // CHECK-LABEL: test_mm256_shuffle_f32x4 // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> >From b943ad28ee5054777c7431237dd4561dc6bdfe33 Mon Sep 17 00:00:00 2001 From: Shawn <kimshaw...@icloud.com> Date: Tue, 16 Sep 2025 00:05:54 -0700 Subject: [PATCH 02/14] Apply suggestion from @tbaederr Co-authored-by: Timm Baeder <tbae...@redhat.com> --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 00466aa7ae663..cd376bee4f288 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2971,7 +2971,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, const Pointer &Dst = S.Stk.peek<Pointer>(); - for (unsigned I = 0; I < DstLen; ++I) { + for (unsigned I = 0; I != DstLen; ++I) { APSInt a, b, c; INT_TYPE_SWITCH(DstElemT, { a = A.elem<T>(I).toAPSInt(); >From d2eb52bee393f449af35e49b7164013e896245c9 Mon Sep 17 00:00:00 2001 From: Shawn <kimshaw...@icloud.com> Date: Tue, 16 Sep 2025 00:06:30 -0700 Subject: [PATCH 03/14] Apply suggestion from @tbaederr Co-authored-by: Timm Baeder <tbae...@redhat.com> --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index cd376bee4f288..3c39c683805d2 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2984,7 +2984,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, bool DstUnsigned = a.isUnsigned(); if (U[I]) { - for (unsigned Bit = 0; Bit < BitWidth; ++Bit) { + for (unsigned Bit = 0; Bit != BitWidth; ++Bit) { unsigned Idx = (a[Bit] << 2) | (b[Bit] << 1) | (c[Bit]); R.setBitVal(Bit, Imm[Idx]); } >From 26b535489fbc6a01391483274390359de08be6f5 Mon Sep 17 00:00:00 2001 From: Shawn <kimshaw...@icloud.com> Date: Tue, 16 Sep 2025 00:09:14 -0700 Subject: [PATCH 04/14] Apply suggestion from @tbaederr Co-authored-by: Timm Baeder <tbae...@redhat.com> --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 3c39c683805d2..8992c23a4fd6c 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3659,7 +3659,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_pternlogq128_mask: case X86::BI__builtin_ia32_pternlogq256_mask: case X86::BI__builtin_ia32_pternlogq512_mask: - return interp__builtin_pternlog(S, OpPC, Call, false); + return interp__builtin_pternlog(S, OpPC, Call, /*MaskZ=*/false); case X86::BI__builtin_ia32_pternlogd128_maskz: case X86::BI__builtin_ia32_pternlogd256_maskz: case X86::BI__builtin_ia32_pternlogd512_maskz: >From 2f1a3895d594e843a619c1fbfa44aff592d0dac0 Mon Sep 17 00:00:00 2001 From: Shawn <kimshaw...@icloud.com> Date: Tue, 16 Sep 2025 00:09:42 -0700 Subject: [PATCH 05/14] Apply suggestion from @tbaederr Co-authored-by: Timm Baeder <tbae...@redhat.com> --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 8992c23a4fd6c..ccf0a47558202 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3666,7 +3666,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_pternlogq128_maskz: case X86::BI__builtin_ia32_pternlogq256_maskz: case X86::BI__builtin_ia32_pternlogq512_maskz: - return interp__builtin_pternlog(S, OpPC, Call, true); + return interp__builtin_pternlog(S, OpPC, Call, /*MaskZ=*/true); case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); >From f20253020ba0e4174a0812f040af2c6476335050 Mon Sep 17 00:00:00 2001 From: kimsh02 <kimshaw...@icloud.com> Date: Tue, 16 Sep 2025 00:47:53 -0700 Subject: [PATCH 06/14] Apply feedback: Use PascalCase --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 26 ++++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index ccf0a47558202..88a17df1b5e4a 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2972,32 +2972,32 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, const Pointer &Dst = S.Stk.peek<Pointer>(); for (unsigned I = 0; I != DstLen; ++I) { - APSInt a, b, c; + APSInt ALane, BLane, CLane; INT_TYPE_SWITCH(DstElemT, { - a = A.elem<T>(I).toAPSInt(); - b = B.elem<T>(I).toAPSInt(); - c = C.elem<T>(I).toAPSInt(); + ALane = A.elem<T>(I).toAPSInt(); + BLane = B.elem<T>(I).toAPSInt(); + CLane = C.elem<T>(I).toAPSInt(); }); - unsigned BitWidth = a.getBitWidth(); - APInt R(BitWidth, 0); - bool DstUnsigned = a.isUnsigned(); + unsigned BitWidth = ALane.getBitWidth(); + APInt RLane(BitWidth, 0); + bool DstUnsigned = ALane.isUnsigned(); if (U[I]) { for (unsigned Bit = 0; Bit != BitWidth; ++Bit) { - unsigned Idx = (a[Bit] << 2) | (b[Bit] << 1) | (c[Bit]); - R.setBitVal(Bit, Imm[Idx]); + unsigned Idx = (ALane[Bit] << 2) | (BLane[Bit] << 1) | (CLane[Bit]); + RLane.setBitVal(Bit, Imm[Idx]); } INT_TYPE_SWITCH_NO_BOOL(DstElemT, { - Dst.elem<T>(I) = static_cast<T>(APSInt(R, DstUnsigned)); + Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned)); }); } else if (MaskZ) { - INT_TYPE_SWITCH_NO_BOOL(DstElemT, { - Dst.elem<T>(I) = static_cast<T>(APSInt(R, DstUnsigned)); + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { /* Zeroes lane */ + Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned)); }); } else { INT_TYPE_SWITCH_NO_BOOL(DstElemT, - { Dst.elem<T>(I) = static_cast<T>(a); }); + { Dst.elem<T>(I) = static_cast<T>(ALane); }); } } Dst.initializeAllElements(); >From 5e76351fc76d0ae932ba8dbf1840416a0a96459d Mon Sep 17 00:00:00 2001 From: kimsh02 <kimshaw...@icloud.com> Date: Tue, 16 Sep 2025 00:53:17 -0700 Subject: [PATCH 07/14] Clang-format --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 88a17df1b5e4a..cbfeed1c5e5d5 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2992,7 +2992,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned)); }); } else if (MaskZ) { - INT_TYPE_SWITCH_NO_BOOL(DstElemT, { /* Zeroes lane */ + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { // Zeroes lane Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned)); }); } else { >From 9a227c02ca6eb0ac052f91b8d14fbad3143e3051 Mon Sep 17 00:00:00 2001 From: kimsh02 <kimshaw...@icloud.com> Date: Wed, 17 Sep 2025 23:26:28 -0700 Subject: [PATCH 08/14] Apply feedback: Rebase and refactor lines --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index cbfeed1c5e5d5..be2eea5feb78e 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2961,10 +2961,10 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>(); unsigned DstLen = VecT->getNumElements(); - PrimType DstElemT = *S.getContext().classify(VecT->getElementType()); + const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType()); - APSInt U = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(4))); - APSInt Imm = popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(3))); + APSInt U = popToAPSInt(S, Call->getArg(4)); + APSInt Imm = popToAPSInt(S, Call->getArg(3)); const Pointer &C = S.Stk.pop<Pointer>(); const Pointer &B = S.Stk.pop<Pointer>(); const Pointer &A = S.Stk.pop<Pointer>(); >From 9cf5aef34c85ae148d76a04a8423790ee89c0f99 Mon Sep 17 00:00:00 2001 From: kimsh02 <kimshaw...@icloud.com> Date: Fri, 19 Sep 2025 14:41:05 -0700 Subject: [PATCH 09/14] Apply feedback: Rebase with main --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index be2eea5feb78e..e23d4798c6706 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3001,6 +3001,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, } } Dst.initializeAllElements(); + return true; } bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, >From decd3b81ef2fa011f7cfbb3a8ac8cb4d5a3e27bc Mon Sep 17 00:00:00 2001 From: kimsh02 <kimshaw...@icloud.com> Date: Fri, 19 Sep 2025 14:47:39 -0700 Subject: [PATCH 10/14] Apply feedback: Rebase with main --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index e23d4798c6706..5b84783b0ff40 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2978,7 +2978,6 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, BLane = B.elem<T>(I).toAPSInt(); CLane = C.elem<T>(I).toAPSInt(); }); - unsigned BitWidth = ALane.getBitWidth(); APInt RLane(BitWidth, 0); bool DstUnsigned = ALane.isUnsigned(); >From 3843b3a6258895953c00b484c9e98823c21a61ef Mon Sep 17 00:00:00 2001 From: kimsh02 <kimshaw...@icloud.com> Date: Mon, 22 Sep 2025 21:16:42 -0700 Subject: [PATCH 11/14] Upload failing testcase for now --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 10 ++- clang/test/CodeGen/X86/avx512f-builtins.c | 89 +++++++++++++---------- 2 files changed, 58 insertions(+), 41 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 5b84783b0ff40..f2af4a90c1f9a 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2960,7 +2960,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, assert(Call->getNumArgs() == 5); const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>(); - unsigned DstLen = VecT->getNumElements(); + const unsigned DstLen = VecT->getNumElements(); const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType()); APSInt U = popToAPSInt(S, Call->getArg(4)); @@ -2973,7 +2973,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, for (unsigned I = 0; I != DstLen; ++I) { APSInt ALane, BLane, CLane; - INT_TYPE_SWITCH(DstElemT, { + INT_TYPE_SWITCH_NO_BOOL(DstElemT, { ALane = A.elem<T>(I).toAPSInt(); BLane = B.elem<T>(I).toAPSInt(); CLane = C.elem<T>(I).toAPSInt(); @@ -2984,7 +2984,11 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, if (U[I]) { for (unsigned Bit = 0; Bit != BitWidth; ++Bit) { - unsigned Idx = (ALane[Bit] << 2) | (BLane[Bit] << 1) | (CLane[Bit]); + unsigned ABit = ALane[Bit]; + unsigned BBit = BLane[Bit]; + unsigned CBit = CLane[Bit]; + + unsigned Idx = (ABit << 2) | (BBit << 1) | (CBit); RLane.setBitVal(Bit, Imm[Idx]); } INT_TYPE_SWITCH_NO_BOOL(DstElemT, { diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 9fa450c68e503..223551233e88f 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -6251,21 +6251,21 @@ TEST_CONSTEXPR(match_v16si( ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), - (unsigned char)0xCA), + (unsigned char)0xCA), // A ? B : C 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC)); TEST_CONSTEXPR(match_v16si( _mm512_ternarylogic_epi32( ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), - (unsigned char)0xFE), + (unsigned char)0xFE), // A | B | C 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF)); TEST_CONSTEXPR(match_v16si( _mm512_ternarylogic_epi32( ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), - (unsigned char)0x80), + (unsigned char)0x80), // A & B & C 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { @@ -6276,28 +6276,37 @@ __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i _ } TEST_CONSTEXPR(match_v16si( _mm512_mask_ternarylogic_epi32( - ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), - (__mmask16)0x3333, - ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), - ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), - (unsigned char)0xCA), - 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0, 0xB, 0xC, -0x1, 0x0)); + ((__m512i)((__v16si){0x1, 0x0, 0x2, 0x0, 0x3, 0x0, 0x4, 0x0, + 0x5, 0x0, 0x6, 0x0, 0x7, 0x0, 0x8, 0x0})), + (__mmask16)0xA55A, + ((__m512i)((__v16si){0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10, 0x11, + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19})), + ((__m512i)((__v16si){0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10})), + (unsigned char)0xCA), // A ? B : C + 0x1, 0x2, 0x2, 0x4, 0x6, 0x0, 0x3, 0x0, 0x8, 0x0, 0xD, 0x0, 0x7, 0xE, 0x8, 0x10)); TEST_CONSTEXPR(match_v16si( _mm512_mask_ternarylogic_epi32( - ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), - (__mmask16)0xCCCC, - ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), - ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), - (unsigned char)0xFE), - 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF, 0x9, 0x9, 0xF, 0xF)); + ((__m512i)((__v16si){0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF})), + (__mmask16)0x0F0F, + ((__m512i)((__v16si){0x1, 0x2, 0x4, 0x8, 0x1, 0x2, 0x4, 0x8, + 0x1, 0x2, 0x4, 0x8, 0x1, 0x2, 0x4, 0x8})), + ((__m512i)((__v16si){0x10, 0x20, 0x40, 0x80, 0x10, 0x20, 0x40, 0x80, + 0x10, 0x20, 0x40, 0x80, 0x10, 0x20, 0x40, 0x80})), + (unsigned char)0xFE), // A | B | C + 0x11, 0x23, 0x46, 0x8B, 0x4, 0x5, 0x6, 0x7, 0x19, 0x2B, 0x4E, 0x8B, 0xC, 0xD, 0xE, 0xF)); TEST_CONSTEXPR(match_v16si( _mm512_mask_ternarylogic_epi32( - ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), - (__mmask16)0x5555, - ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), - ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), - (unsigned char)0x80), - 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9, 0x0, 0x9)); + ((__m512i)((__v16si){0xF, 0x7, 0x3, 0x1, 0xF, 0x7, 0x3, 0x1, + 0xFF, 0xF, 0xF0, 0xAA, 0x55, 0xCC, 0x33, 0xFF})), + (__mmask16)0xAAAA, + ((__m512i)((__v16si){0xE, 0x7, 0x2, 0x1, 0xF, 0x0, 0x3, 0x0, + 0xF, 0xF0, 0xFF, 0x55, 0x55, 0x33, 0x33, 0xF})), + ((__m512i)((__v16si){0xD, 0x7, 0x0, 0x1, 0xF, 0x7, 0x0, 0x1, + 0xF0, 0xF, 0xF, 0xFF, 0xF, 0xCC, 0x33, 0xF0})), + (unsigned char)0x80), // A & B & C + 0xF, 0x7, 0x3, 0x1, 0xF, 0x0, 0x3, 0x0, 0xFF, 0x0, 0xF0, 0x0, 0x55, 0x0, 0x33, 0x0)); __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_maskz_ternarylogic_epi32 @@ -6311,7 +6320,7 @@ TEST_CONSTEXPR(match_v16si( ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), - (unsigned char)0xCA), + (unsigned char)0xCA), // A ? B : C 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0)); TEST_CONSTEXPR(match_v16si( _mm512_maskz_ternarylogic_epi32( @@ -6319,7 +6328,7 @@ TEST_CONSTEXPR(match_v16si( ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), - (unsigned char)0xFE), + (unsigned char)0xFE), // A | B | C 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF)); TEST_CONSTEXPR(match_v16si( _mm512_maskz_ternarylogic_epi32( @@ -6327,7 +6336,7 @@ TEST_CONSTEXPR(match_v16si( ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), - (unsigned char)0x80), + (unsigned char)0x80), // A & B & C 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); __m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) { @@ -6337,25 +6346,29 @@ __m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) { } TEST_CONSTEXPR(match_v8di( _mm512_ternarylogic_epi64( + ((__m512i)((__v8di){0x1111, 0x2222, 0x3333, 0x4444, 0x5555, 0x6666, 0x7777, 0x8888})), + ((__m512i)((__v8di){0xAAAA, 0xBBBB, 0xCCCC, 0xDDDD, 0xEEEE, 0xFFFF, 0x1111, 0x2222})), ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})), - ((__m512i)((__v8di){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})), - ((__m512i)((__v8di){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})), - (unsigned char)0xCA), - 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC)); + (unsigned char)0xD8), // C ? B : A + 0xAAAA, 0x2222, 0xCCCC, 0x4444, 0xEEEE, 0x6666, 0x1111, 0x8888)); TEST_CONSTEXPR(match_v8di( _mm512_ternarylogic_epi64( - ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), - ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), - ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), - (unsigned char)0xFE), - 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF)); + ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, 0xF0F0, 0xFF, -0x5555555555555556, 0x5555555555555555})), + ((__m512i)((__v8di){0x1234, 0xFFFF, 0xFF, 0xF0F, 0x3333, 0xFF00, -0x5555555555555556, -0x0F0F0F0F0F0F0F10})), + ((__m512i)((__v8di){0xFFFF, 0x1234, 0xF0F, 0xFF00, 0xF0F0, 0x3333, 0x5555555555555555, 0x0F0F0F0F0F0F0F0})), + (unsigned char)0x8F), // ~A | (B & C) + 0x1234, -0x1, 0xF, -0x1, -0xC0C1, -0x100, 0x5555555555555555, -0x5505050505050506)); TEST_CONSTEXPR(match_v8di( _mm512_ternarylogic_epi64( - ((__m512i)((__v8di){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})), - ((__m512i)((__v8di){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})), - ((__m512i)((__v8di){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})), - (unsigned char)0x80), - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)); + ((__m512i)((__v8di){0x7FFFFFFFFFFFFFFF, 0x0, 0x00FF00FF00FF00FF, 0x0F0F0F0F0F0F0F0F, + 0x123456789ABCDEF0, 0x3333333333333333, 0x5555555555555555, 0x0123456789ABCDEF})), + ((__m512i)((__v8di){0x1111111111111111, 0x2222222222222222, 0xFFFFFFFF, -0x100000000, + 0x0, -0x3333333333333334, -0x0F0F0F0F0F0F0F10, -0x123456789ABCDF0})), + ((__m512i)((__v8di){0x2222222222222222, 0x1111111111111111, -0x1000000000000, 0xFFFFFFFF, + -0x1, 0x0, 0x0F0F0F0F0F0F0F0F, 0x0})), + (unsigned char)0xE0), // A & (B | C) + 0x3333333333333333, 0x0, 0x00FF000000FF00FF, 0x0F0F0F0F0F0F0F0F, + 0x123456789ABCDEF0, 0x0, 0x5555555555555555, 0x0)); __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) { // CHECK-LABEL: test_mm512_mask_ternarylogic_epi64 >From 1d8e4bff57c99b8f7de8503d583501528c5fe9f0 Mon Sep 17 00:00:00 2001 From: kimsh02 <kimshaw...@icloud.com> Date: Wed, 24 Sep 2025 08:14:40 -0700 Subject: [PATCH 12/14] Save debug changes --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index f2af4a90c1f9a..bc9e5e0ab839e 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2962,9 +2962,10 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>(); const unsigned DstLen = VecT->getNumElements(); const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType()); + const bool DstUnsigned = VecT->isUnsignedIntegerOrEnumerationType(); - APSInt U = popToAPSInt(S, Call->getArg(4)); - APSInt Imm = popToAPSInt(S, Call->getArg(3)); + APInt U = popToAPSInt(S, Call->getArg(4)); + APInt Imm = popToAPSInt(S, Call->getArg(3)); const Pointer &C = S.Stk.pop<Pointer>(); const Pointer &B = S.Stk.pop<Pointer>(); const Pointer &A = S.Stk.pop<Pointer>(); @@ -2972,7 +2973,9 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, const Pointer &Dst = S.Stk.peek<Pointer>(); for (unsigned I = 0; I != DstLen; ++I) { - APSInt ALane, BLane, CLane; + APInt ALane; + APInt BLane; + APInt CLane; INT_TYPE_SWITCH_NO_BOOL(DstElemT, { ALane = A.elem<T>(I).toAPSInt(); BLane = B.elem<T>(I).toAPSInt(); @@ -2980,7 +2983,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, }); unsigned BitWidth = ALane.getBitWidth(); APInt RLane(BitWidth, 0); - bool DstUnsigned = ALane.isUnsigned(); + // bool DstUnsigned = ALane.isUnsigned(); if (U[I]) { for (unsigned Bit = 0; Bit != BitWidth; ++Bit) { @@ -3000,7 +3003,7 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, }); } else { INT_TYPE_SWITCH_NO_BOOL(DstElemT, - { Dst.elem<T>(I) = static_cast<T>(ALane); }); + { Dst.elem<T>(I) = static_cast<T>(APSInt(ALane, DstUnsigned)); }); } } Dst.initializeAllElements(); >From b0ae797a6e2aa3ed8af8a0852e6dab42ef1f499b Mon Sep 17 00:00:00 2001 From: kimsh02 <kimshaw...@icloud.com> Date: Thu, 25 Sep 2025 18:52:33 -0700 Subject: [PATCH 13/14] Save debug changes --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index bc9e5e0ab839e..18001737a783d 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -20,6 +20,10 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SipHash.h" +#include <iostream> +#include "llvm/Support/Debug.h" +#define DEBUG_TYPE "interp-builtin" + namespace clang { namespace interp { @@ -2959,6 +2963,9 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, const CallExpr *Call, bool MaskZ) { assert(Call->getNumArgs() == 5); + // LLVM_DEBUG(llvm::dbgs() << "Debug\n"); + std::cout << "Debug here\n"; + const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>(); const unsigned DstLen = VecT->getNumElements(); const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType()); >From 347bfb5685b8063fd97199c262c9c09a3f3f707a Mon Sep 17 00:00:00 2001 From: kimsh02 <kimshaw...@icloud.com> Date: Sat, 27 Sep 2025 21:30:33 -0700 Subject: [PATCH 14/14] Save debug changes --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 23 +++++++------- clang/test/CodeGen/X86/avx512f-builtins.c | 5 +-- clang/test/CodeGen/X86/avx512vl-builtins.c | 36 +++++++++++++++++++--- 3 files changed, 44 insertions(+), 20 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 18001737a783d..b06016f5f54c3 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -20,10 +20,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SipHash.h" -#include <iostream> -#include "llvm/Support/Debug.h" -#define DEBUG_TYPE "interp-builtin" - namespace clang { namespace interp { @@ -2963,16 +2959,13 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, const CallExpr *Call, bool MaskZ) { assert(Call->getNumArgs() == 5); - // LLVM_DEBUG(llvm::dbgs() << "Debug\n"); - std::cout << "Debug here\n"; - const VectorType *VecT = Call->getArg(0)->getType()->castAs<VectorType>(); const unsigned DstLen = VecT->getNumElements(); const PrimType &DstElemT = *S.getContext().classify(VecT->getElementType()); - const bool DstUnsigned = VecT->isUnsignedIntegerOrEnumerationType(); + const bool DstUnsigned = VecT->getElementType()->isUnsignedIntegerOrEnumerationType(); - APInt U = popToAPSInt(S, Call->getArg(4)); - APInt Imm = popToAPSInt(S, Call->getArg(3)); + const APInt U = popToAPSInt(S, Call->getArg(4)); + const APInt Imm = popToAPSInt(S, Call->getArg(3)); const Pointer &C = S.Stk.pop<Pointer>(); const Pointer &B = S.Stk.pop<Pointer>(); const Pointer &A = S.Stk.pop<Pointer>(); @@ -2988,9 +2981,17 @@ static bool interp__builtin_pternlog(InterpState &S, CodePtr OpPC, BLane = B.elem<T>(I).toAPSInt(); CLane = C.elem<T>(I).toAPSInt(); }); - unsigned BitWidth = ALane.getBitWidth(); + const unsigned BitWidth = ALane.getBitWidth(); APInt RLane(BitWidth, 0); // bool DstUnsigned = ALane.isUnsigned(); + + #define DEBUG_TYPE "ptern" + LLVM_DEBUG({ + ALane.print(llvm::dbgs(), false); llvm::dbgs() << "\n"; + BLane.print(llvm::dbgs(), false); llvm::dbgs() << "\n"; + CLane.print(llvm::dbgs(), false); llvm::dbgs() << "\n"; + RLane.print(llvm::dbgs(), false); llvm::dbgs() << "\n"; + }); if (U[I]) { for (unsigned Bit = 0; Bit != BitWidth; ++Bit) { diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 223551233e88f..41be6de64ed1a 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -3,10 +3,7 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s -// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s -// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s -// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s -// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s + #include <immintrin.h> #include "builtin_test_helpers.h" diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index d29045af6e303..8501ab7b306b7 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -1,8 +1,7 @@ // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s -// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s -// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s + #include <immintrin.h> #include "builtin_test_helpers.h" @@ -8362,7 +8361,7 @@ TEST_CONSTEXPR(match_v4si( (__mmask8)0x03, ((__m128i)((__v4si){0xB, 0xB, 0xB, 0xB})), ((__m128i)((__v4si){0xC, 0xC, 0xC, 0xC })), - (unsigned char)0xCA), + (unsigned char)0xCA), // A ? B : C 0xB, 0xC, -0x1, 0x0)); TEST_CONSTEXPR(match_v4si( _mm_mask_ternarylogic_epi32( @@ -8370,7 +8369,7 @@ TEST_CONSTEXPR(match_v4si( (__mmask8)0x0C, ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), - (unsigned char)0xFE), + (unsigned char)0xFE), // A | B | C 0x9, 0x9, 0xF, 0xF)); TEST_CONSTEXPR(match_v4si( _mm_mask_ternarylogic_epi32( @@ -8378,7 +8377,7 @@ TEST_CONSTEXPR(match_v4si( (__mmask8)0x05, ((__m128i)((__v4si){0x4, 0x4, 0x4, 0x4})), ((__m128i)((__v4si){0x2, 0x2, 0x2, 0x2})), - (unsigned char)0x80), + (unsigned char)0x80), // A & B & C 0x0, 0x9, 0x0, 0x9)); __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) { @@ -8387,6 +8386,33 @@ __m128i test_mm_maskz_ternarylogic_epi32(__mmask8 __U, __m128i __A, __m128i __B, // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer return _mm_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4); } +// B ? A : C imm = 0xE2 (Idx = (A<<2)|(B<<1)|C per VPTERNLOG) +TEST_CONSTEXPR(match_v4si( + _mm_maskz_ternarylogic_epi32( + (__mmask8)0x0B, + ((__m128i)((__v4si){(int)0xDEADBEEF, 0, (int)0xFFFFFFFF, 0x13579BDF})), // A + ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, 0})), // B + ((__m128i)((__v4si){(int)0xCAFEBABE, (int)0xFFFFFFFF, 0, 0x2468ACE0})), // C + (unsigned char)0xE2), + (int)0xDEADBEEF, (int)0xFFFFFFFF, 0, 0x2468ACE0)); + // ~(A & B) | ~(B & C) imm = 0x7F +TEST_CONSTEXPR(match_v4si( + _mm_maskz_ternarylogic_epi32( + (__mmask8)0x0C, + ((__m128i)((__v4si){0, (int)0xFFFFFFFF, (int)0xAAAAAAAA, 0x55555555})), // A + ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, (int)0xFFFFFFFF, (int)0xFFFFFFFF})), // B + ((__m128i)((__v4si){(int)0xF0F0F0F0, 0, 0, (int)0xFFFFFFFF})), // C + (unsigned char)0x7F), + 0, 0, (int)0xFFFFFFFF, (int)0xAAAAAAAA)); + // ~A | ~B | C imm = 0xBF +TEST_CONSTEXPR(match_v4si( + _mm_maskz_ternarylogic_epi32( + (__mmask8)0x05, + ((__m128i)((__v4si){(int)0xFFFFFFFF, 0, 0x12345678, 0})), // A + ((__m128i)((__v4si){0, 0, 0x0000FFFF, (int)0xFFFFFFFF})), // B + ((__m128i)((__v4si){0, 0, 0x0000000F, 0})), // C + (unsigned char)0xBF), + (int)0xFFFFFFFF, 0, (int)0xFFFFA98F, 0)); TEST_CONSTEXPR(match_v4si( _mm_maskz_ternarylogic_epi32( (__mmask8)0x03, _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits