https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/156480
Followup to #154780 >From 2bed7f329c50218cb2173c555a9f3ffae31e4241 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <[email protected]> Date: Tue, 2 Sep 2025 16:51:42 +0100 Subject: [PATCH] [X86] Allow AVX512 512-bit variants of AVX2 per-element shift intrinsics to be used in constexpr Followup to #154780 --- clang/include/clang/Basic/BuiltinsX86.td | 7 ++++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 3 +++ clang/lib/AST/ExprConstant.cpp | 6 ++++++ clang/lib/Headers/avx512fintrin.h | 18 +++++++++--------- clang/test/CodeGen/X86/avx512f-builtins.c | 9 +++++++++ 5 files changed, 31 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index acd8f70c4a5f2..0d44e78f879b9 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1993,6 +1993,10 @@ let Features = "avx512dq,evex512", Attributes = [NoThrow, Const, RequiredVectorW } let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def psllv16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; + def psrav16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; + def psrlv16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; + def prold512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">; def prord512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">; def prolq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">; @@ -2422,15 +2426,12 @@ let Features = "avx512vl", let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def pslld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">; def psllq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; - def psllv16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; def psllv8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; def psrad512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">; def psraq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; - def psrav16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; def psrav8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; def psrld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">; def psrlq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">; - def psrlv16si : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; def psrlv8di : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; def pternlogd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; def pternlogd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index e05b1a88c15bb..8c2b71160f7f3 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3268,6 +3268,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_psllv4di: case clang::X86::BI__builtin_ia32_psllv4si: case clang::X86::BI__builtin_ia32_psllv8si: + case clang::X86::BI__builtin_ia32_psllv16si: case clang::X86::BI__builtin_ia32_psllwi128: case clang::X86::BI__builtin_ia32_psllwi256: case clang::X86::BI__builtin_ia32_psllwi512: @@ -3287,6 +3288,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_psrav4si: case clang::X86::BI__builtin_ia32_psrav8si: + case clang::X86::BI__builtin_ia32_psrav16si: case clang::X86::BI__builtin_ia32_psrawi128: case clang::X86::BI__builtin_ia32_psrawi256: case clang::X86::BI__builtin_ia32_psrawi512: @@ -3308,6 +3310,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_psrlv4di: case clang::X86::BI__builtin_ia32_psrlv4si: case clang::X86::BI__builtin_ia32_psrlv8si: + case clang::X86::BI__builtin_ia32_psrlv16si: case clang::X86::BI__builtin_ia32_psrlwi128: case clang::X86::BI__builtin_ia32_psrlwi256: case clang::X86::BI__builtin_ia32_psrlwi512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b4f1e76187e25..388cc55814280 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11627,12 +11627,15 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_psllv4di: case clang::X86::BI__builtin_ia32_psllv4si: case clang::X86::BI__builtin_ia32_psllv8si: + case clang::X86::BI__builtin_ia32_psllv16si: case clang::X86::BI__builtin_ia32_psrav4si: case clang::X86::BI__builtin_ia32_psrav8si: + case clang::X86::BI__builtin_ia32_psrav16si: case clang::X86::BI__builtin_ia32_psrlv2di: case clang::X86::BI__builtin_ia32_psrlv4di: case clang::X86::BI__builtin_ia32_psrlv4si: case clang::X86::BI__builtin_ia32_psrlv8si: + case clang::X86::BI__builtin_ia32_psrlv16si: case clang::X86::BI__builtin_ia32_psllwi128: case clang::X86::BI__builtin_ia32_pslldi128: @@ -11763,6 +11766,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_psllv4di: case clang::X86::BI__builtin_ia32_psllv4si: case clang::X86::BI__builtin_ia32_psllv8si: + case clang::X86::BI__builtin_ia32_psllv16si: if (RHS.uge(RHS.getBitWidth())) { ResultElements.push_back( APValue(APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned))); @@ -11773,6 +11777,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { break; case clang::X86::BI__builtin_ia32_psrav4si: case clang::X86::BI__builtin_ia32_psrav8si: + case clang::X86::BI__builtin_ia32_psrav16si: if (RHS.uge(RHS.getBitWidth())) { ResultElements.push_back( APValue(APSInt(LHS.ashr(RHS.getBitWidth() - 1), DestUnsigned))); @@ -11785,6 +11790,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_psrlv4di: case clang::X86::BI__builtin_ia32_psrlv4si: case clang::X86::BI__builtin_ia32_psrlv8si: + case clang::X86::BI__builtin_ia32_psrlv16si: if (RHS.uge(RHS.getBitWidth())) { ResultElements.push_back( APValue(APSInt(APInt::getZero(RHS.getBitWidth()), DestUnsigned))); diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 5222141d21606..18215c1924c08 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -5644,13 +5644,13 @@ _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sllv_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5658,7 +5658,7 @@ _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5732,13 +5732,13 @@ _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srav_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5746,7 +5746,7 @@ _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5820,13 +5820,13 @@ _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_srlv_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5834,7 +5834,7 @@ _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index eb3b28390947f..1d280d490f6a7 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -5918,6 +5918,7 @@ __m512i test_mm512_sllv_epi32(__m512i __X, __m512i __Y) { // CHECK: @llvm.x86.avx512.psllv.d.512 return _mm512_sllv_epi32(__X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_sllv_epi32((__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 2, -8, 24, -64, 0, 0, 0, 0, 536870912, 0, -2147483648, 0, 80, -48, 28, -16)); __m512i test_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_sllv_epi32 @@ -5925,6 +5926,7 @@ __m512i test_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m5 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_sllv_epi32(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_sllv_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x912A, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 99, -8, 99, -64, 99, 0, 99, 99, 536870912, 99, 99, 99, 80, 99, 99, -16)); __m512i test_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_maskz_sllv_epi32 @@ -5932,6 +5934,7 @@ __m512i test_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_sllv_epi32(__U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_sllv_epi32(0x3C8F, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 2, -8, 24, -64, 0, 0, 0, 0, 0, 0, -2147483648, 0, 80, -48, 0, 0)); __m512i test_mm512_sllv_epi64(__m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_sllv_epi64 @@ -5998,6 +6001,7 @@ __m512i test_mm512_srav_epi32(__m512i __X, __m512i __Y) { // CHECK: @llvm.x86.avx512.psrav.d.512 return _mm512_srav_epi32(__X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_srav_epi32((__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 1, -4)); __m512i test_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_srav_epi32 @@ -6005,6 +6009,7 @@ __m512i test_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m5 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srav_epi32(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_srav_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x912A, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 99, -1, 99, -1, 99, -1, 99, 99, 0, 99, 99, 99, 0, 99, 99, -4)); __m512i test_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_maskz_srav_epi32 @@ -6012,6 +6017,7 @@ __m512i test_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srav_epi32(__U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_srav_epi32(0x3C8F, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 0, -1, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, -1, 0, 0)); __m512i test_mm512_srav_epi64(__m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_srav_epi64 @@ -6078,6 +6084,7 @@ __m512i test_mm512_srlv_epi32(__m512i __X, __m512i __Y) { // CHECK: @llvm.x86.avx512.psrlv.d.512 return _mm512_srlv_epi32(__X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_srlv_epi32((__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 0, 1073741823, 0, 268435455, 0, 1, 0, 7, 0, 0, 0, 0, 0, 536870911, 1, 2147483644)); __m512i test_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_mask_srlv_epi32 @@ -6085,6 +6092,7 @@ __m512i test_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m5 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srlv_epi32(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_srlv_epi32((__m512i)(__v16si){99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x912A, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 99, 1073741823, 99, 268435455, 99, 1, 99, 99, 0, 99, 99, 99, 0, 99, 99, 2147483644)); __m512i test_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_maskz_srlv_epi32 @@ -6092,6 +6100,7 @@ __m512i test_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srlv_epi32(__U, __X, __Y); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_srlv_epi32(0x3C8F, (__m512i)(__v16si){1, -2, 3, -4, 5, -6, 7, -8, 1, -2, 3, -4, 5, -6, 7, -8}, (__m512i)(__v16si){1, 2, 3, 4, -17, 31, 33, 29, 29, 33, 31, -17, 4, 3, 2, 1}), 0, 1073741823, 0, 268435455, 0, 0, 0, 7, 0, 0, 0, 0, 0, 536870911, 0, 0)); __m512i test_mm512_srlv_epi64(__m512i __X, __m512i __Y) { // CHECK-LABEL: test_mm512_srlv_epi64 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
